Skip to content

Commit

Permalink
Bumped shuffle AI app
Browse files Browse the repository at this point in the history
  • Loading branch information
frikky committed May 13, 2024
1 parent 36333b0 commit 284ce19
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 22 deletions.
21 changes: 17 additions & 4 deletions email/1.3.0/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,10 @@ def parse_eml(self, filedata, extract_attachments=False):
"data": filedata,
}

# Encode the data as utf-8 if it's not base64
if not str(parsedfile["data"]).endswith("="):
parsedfile["data"] = parsedfile["data"].encode("utf-8")

return self.parse_email_file(parsedfile, extract_attachments)

def parse_email_file(self, file_id, extract_attachments=False):
Expand All @@ -413,8 +417,6 @@ def parse_email_file(self, file_id, extract_attachments=False):
"reason": "Couldn't get file with ID %s" % file_id
}

#print("PRE: ", file_path)

# Check if data is in base64 and decode it
# If it ends with = then it may be bas64

Expand All @@ -434,6 +436,16 @@ def parse_email_file(self, file_id, extract_attachments=False):
else:
extract_attachments = False

# Replace raw newlines \\r\\n with actual newlines
# The data is a byte string, so we need to decode it to utf-8
try:
print("Pre size: %d" % len(file_path["data"]))
file_path["data"] = file_path["data"].decode("utf-8").replace("\\r\\n", "\n").encode("utf-8")
print("Post size: %d" % len(file_path["data"]))
except Exception as e:
print(f"Failed to decode file: {e}")
pass

# Makes msg into eml
if ".msg" in file_path["filename"] or "." not in file_path["filename"]:
print(f"[DEBUG] Working with .msg file {file_path['filename']}. Filesize: {len(file_path['data'])}")
Expand All @@ -448,6 +460,7 @@ def parse_email_file(self, file_id, extract_attachments=False):
if ".msg" in file_path["filename"]:
return {"success":False, "reason":f"Exception occured during msg parsing: {e}"}


ep = eml_parser.EmlParser(
include_attachment_data=True,
include_raw_body=True
Expand All @@ -456,8 +469,8 @@ def parse_email_file(self, file_id, extract_attachments=False):
try:
print("Pre email")
parsed_eml = ep.decode_email_bytes(file_path['data'])
if str(parsed_eml["header"]["date"]) == "1970-01-01 00:00:00+00:00" and len(parsed_eml["header"]["subject"]) == 0:
return {"success":False,"reason":"Not a valid EML/MSG file, or the file have a timestamp or subject defined (required).", "date": str(parsed_eml["header"]["date"]), "subject": str(parsed_eml["header"]["subject"])}
#if str(parsed_eml["header"]["date"]) == "1970-01-01 00:00:00+00:00" and len(parsed_eml["header"]["subject"]) == 0:
# return {"success":False,"reason":"Not a valid EML/MSG file, or the file have a timestamp or subject defined (required).", "date": str(parsed_eml["header"]["date"]), "subject": str(parsed_eml["header"]["subject"])}

# Put attachments in the shuffle file system
print("Pre attachment")
Expand Down
4 changes: 2 additions & 2 deletions shuffle-ai/1.0.0/upload.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

gcloud run deploy shuffle-ai-1-0-0 \
--region=europe-west2 \
--max-instances=3 \
--max-instances=5 \
--set-env-vars=SHUFFLE_APP_EXPOSED_PORT=8080,SHUFFLE_SWARM_CONFIG=run,SHUFFLE_LOGS_DISABLED=true --source=./ \
--timeout=1800s
--timeout=300s
47 changes: 31 additions & 16 deletions shuffle-tools/1.2.0/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,29 +94,44 @@ def base64_conversion(self, string, operation):
return value

elif operation == "decode":
decoded_bytes = ""

if "-" in string:
string = string.replace("-", "+", -1)

if "_" in string:
string = string.replace("_", "/", -1)

# Fix padding
if len(string) % 4 != 0:
string += "=" * (4 - len(string) % 4)


# For loop this. It's stupid.
decoded_bytes = ""
try:
decoded_bytes = base64.b64decode(string)
except Exception as e:
if "incorrect padding" in str(e).lower():
try:
decoded_bytes = base64.b64decode(string + "=")
except Exception as e:
if "incorrect padding" in str(e).lower():
try:
decoded_bytes = base64.b64decode(string + "==")
except Exception as e:
if "incorrect padding" in str(e).lower():
try:
decoded_bytes = base64.b64decode(string + "===")
except Exception as e:
if "incorrect padding" in str(e).lower():
return "Invalid Base64"
return json.dumps({
"success": False,
"reason": "Invalid Base64 - %s" % e,
})

#if "incorrect padding" in str(e).lower():
# try:
# decoded_bytes = base64.b64decode(string + "=")
# except Exception as e:
# if "incorrect padding" in str(e).lower():
# try:
# decoded_bytes = base64.b64decode(string + "==")
# except Exception as e:
# if "incorrect padding" in str(e).lower():
# try:
# decoded_bytes = base64.b64decode(string + "===")
# except Exception as e:
# if "incorrect padding" in str(e).lower():
# return "Invalid Base64"


decoded_bytes = base64.b64decode(string)
try:
decoded_bytes = str(decoded_bytes, "utf-8")
except:
Expand Down

0 comments on commit 284ce19

Please sign in to comment.