forked from MISP/misp-modules
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathemail_import.py
292 lines (243 loc) · 11.7 KB
/
email_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import base64
import zipfile
import re
from html.parser import HTMLParser
from pymisp.tools import EMailObject, make_binary_objects
try:
from pymisp.tools import URLObject
except ImportError:
raise ImportError('Unable to import URLObject, pyfaup missing')
from io import BytesIO
from pathlib import Path
misperrors = {'error': 'Error'}
mispattributes = {'inputSource': ['file'], 'output': ['MISP objects'],
'format': 'misp_standard'}
moduleinfo = {'version': '0.2',
'author': 'Seamus Tuohy, Raphaël Vinot',
'description': 'Email import module for MISP',
'module-type': ['import']}
# unzip_attachments : Unzip all zip files that are not password protected
# guess_zip_attachment_passwords : This attempts to unzip all password protected zip files using all the strings found in the email body and subject
# extract_urls : This attempts to extract all URL's from text/html parts of the email
moduleconfig = ["unzip_attachments",
"guess_zip_attachment_passwords",
"extract_urls"]
def handler(q=False):
if q is False:
return False
# Decode and parse email
request = json.loads(q)
# request data is always base 64 byte encoded
data = base64.b64decode(request["data"])
email_object = EMailObject(pseudofile=BytesIO(data), attach_original_email=True, standalone=False)
# Check if we were given a configuration
config = request.get("config", {})
# Don't be picky about how the user chooses to say yes to these
acceptable_config_yes = ['y', 'yes', 'true', 't']
# Do we unzip attachments we find?
unzip = config.get("unzip_attachments", None)
if (unzip is not None and unzip.lower() in acceptable_config_yes):
unzip = True
# Do we try to find passwords for protected zip files?
zip_pass_crack = config.get("guess_zip_attachment_passwords", None)
if (zip_pass_crack is not None and zip_pass_crack.lower() in acceptable_config_yes):
zip_pass_crack = True
password_list = get_zip_passwords(email_object.email)
# Do we extract URL's from the email.
extract_urls = config.get("extract_urls", None)
if (extract_urls is not None and extract_urls.lower() in acceptable_config_yes):
extract_urls = True
file_objects = [] # All possible file objects
# Get Attachments
# Get file names of attachments
for attachment_name, attachment in email_object.attachments:
# Create file objects for the attachments
if not attachment_name:
attachment_name = 'NameMissing.txt'
temp_filename = Path(attachment_name)
zipped_files = ["doc", "docx", "dot", "dotx", "xls", "xlsx", "xlm", "xla",
"xlc", "xlt", "xltx", "xlw", "ppt", "pptx", "pps", "ppsx",
"pot", "potx", "potx", "sldx", "odt", "ods", "odp", "odg",
"odf", "fodt", "fods", "fodp", "fodg", "ott", "uot"]
# Attempt to unzip the attachment and return its files
if unzip and temp_filename.suffix[1:] not in zipped_files:
try:
unzip_attachement(attachment_name, attachment, email_object, file_objects)
except RuntimeError: # File is encrypted with a password
if zip_pass_crack is True:
password = test_zip_passwords(attachment, password_list)
if password:
unzip_attachement(attachment_name, attachment, email_object, file_objects, password)
else: # Inform the analyst that we could not crack password
f_object, main_object, sections = make_binary_objects(pseudofile=attachment, filename=attachment_name, standalone=False)
f_object.comment = "Encrypted Zip: Password could not be cracked from message"
file_objects.append(f_object)
file_objects.append(main_object)
file_objects += sections
email_object.add_reference(f_object.uuid, 'includes', 'Email attachment')
except zipfile.BadZipFile: # Attachment is not a zipfile
# Just straight add the file
f_object, main_object, sections = make_binary_objects(pseudofile=attachment, filename=attachment_name, standalone=False)
file_objects.append(f_object)
file_objects.append(main_object)
file_objects += sections
email_object.add_reference(f_object.uuid, 'includes', 'Email attachment')
else:
# Just straight add the file
f_object, main_object, sections = make_binary_objects(pseudofile=attachment, filename=attachment_name, standalone=False)
file_objects.append(f_object)
file_objects.append(main_object)
file_objects += sections
email_object.add_reference(f_object.uuid, 'includes', 'Email attachment')
mail_body = email_object.email.get_body(preferencelist=('html', 'plain'))
if extract_urls and mail_body:
charset = mail_body.get_content_charset('utf-8')
if mail_body.get_content_type() == 'text/html':
url_parser = HTMLURLParser()
url_parser.feed(mail_body.get_payload(decode=True).decode(charset, errors='ignore'))
urls = url_parser.urls
else:
urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', mail_body.get_payload(decode=True).decode(charset, errors='ignore'))
for url in urls:
if not url:
continue
url_object = URLObject(url, standalone=False)
file_objects.append(url_object)
email_object.add_reference(url_object.uuid, 'includes', 'URL in email body')
objects = [email_object.to_json()]
if file_objects:
objects += [o.to_json() for o in file_objects if o]
r = {'results': {'Object': [json.loads(o) for o in objects]}}
return r
def unzip_attachement(filename, data, email_object, file_objects, password=None):
"""Extract the contents of a zipfile.
Args:
filename (str): A string containing the name of the zip file.
data (decoded attachment data): Data object decoded from an e-mail part.
Returns:
Returns an array containing a dict for each file
Example Dict {"values":"name_of_file.txt",
"data":<Base64 Encoded BytesIO>,
"comment":"string here"}
"""
with zipfile.ZipFile(data, "r") as zf:
if password is not None:
comment = f'Extracted from {filename} with password "{password}"'
password = str.encode(password) # Byte encoded password required
else:
comment = f'Extracted from {filename}'
for zip_file_name in zf.namelist(): # Get all files in the zip file
with zf.open(zip_file_name, mode='r', pwd=password) as fp:
file_data = BytesIO(fp.read())
f_object, main_object, sections = make_binary_objects(pseudofile=file_data,
filename=zip_file_name,
standalone=False)
f_object.comment = comment
file_objects.append(f_object)
file_objects.append(main_object)
file_objects += sections
email_object.add_reference(f_object.uuid, 'includes', 'Email attachment')
def test_zip_passwords(data, test_passwords):
"""Test passwords until one is found to be correct.
Args:
data (decoded attachment data): Data object decoded from an e-mail part.
test_passwords (array): List of strings to test as passwords
Returns:
Returns a byte string containing a found password and None if password is not found.
"""
with zipfile.ZipFile(data, "r") as zf:
firstfile = zf.namelist()[0]
for pw_test in test_passwords:
byte_pwd = str.encode(pw_test)
try:
zf.open(firstfile, pwd=byte_pwd)
return pw_test
except RuntimeError: # Incorrect Password
continue
return None
def get_zip_passwords(message):
""" Parse message for possible zip password combinations.
Args:
message (email.message) Email message object to parse.
"""
possible_passwords = []
# Passwords commonly used for malware
malware_passwords = ["infected", "malware"]
possible_passwords += malware_passwords
# Commonly used passwords
common_passwords = ["123456", "password", "12345678", "qwerty",
"abc123", "123456789", "111111", "1234567",
"iloveyou", "adobe123", "123123", "sunshine",
"1234567890", "letmein", "1234", "monkey",
"shadow", "sunshine", "12345", "password1",
"princess", "azerty", "trustno1", "000000"]
possible_passwords += common_passwords
# Not checking for multi-part message because by having an
# encrypted zip file it must be multi-part.
body = []
for part in message.walk():
charset = part.get_content_charset()
if not charset:
charset = "utf-8"
if part.get_content_type() == 'text/plain':
body.append(part.get_payload(decode=True).decode(charset, errors='ignore'))
elif part.get_content_type() == 'text/html':
html_parser = HTMLTextParser()
payload = part.get_payload(decode=True)
if payload:
html_parser.feed(payload.decode(charset, errors='ignore'))
for text in html_parser.text_data:
body.append(text)
raw_text = "\n".join(body).strip()
# Add subject to text corpus to parse
if "Subject" in message:
subject = " " + message.get('Subject')
raw_text += subject
# Grab any strings that are marked off by special chars
marking_chars = [["\'", "\'"], ['"', '"'], ['[', ']'], ['(', ')']]
for char_set in marking_chars:
regex = re.compile(r"""\{0}([^\{1}]*)\{1}""".format(char_set[0], char_set[1]))
marked_off = re.findall(regex, raw_text)
possible_passwords += marked_off
# Create a list of unique words to test as passwords
individual_words = re.split(r"\s", raw_text)
# Also get words with basic punctuation stripped out
# just in case someone places a password in a proper sentence
stripped_words = [i.strip('.,;:?!') for i in individual_words]
unique_words = list(set(individual_words + stripped_words))
possible_passwords += unique_words
return possible_passwords
class HTMLTextParser(HTMLParser):
""" Parse all text and data from HTML strings."""
def __init__(self, text_data=None):
HTMLParser.__init__(self)
if text_data is None:
self.text_data = []
else:
self.text_data = text_data
def handle_data(self, data):
self.text_data.append(data)
class HTMLURLParser(HTMLParser):
""" Parse all href targets from HTML strings."""
def __init__(self, urls=None):
HTMLParser.__init__(self)
if urls is None:
self.urls = []
else:
self.urls = urls
def handle_starttag(self, tag, attrs):
if tag == 'a':
self.urls.append(dict(attrs).get('href'))
if tag == 'img':
self.urls.append(dict(attrs).get('src'))
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo
if __name__ == '__main__':
with open('tests/test_no_attach.eml', 'r') as email_file:
handler(q=email_file.read())