Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AWS parser update #244

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 68 additions & 18 deletions circuit_maintenance_parser/parsers/aws.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""AquaComms parser."""
"""AWS parser."""
import hashlib
import logging
import quopri
Expand All @@ -13,6 +13,7 @@
# pylint: disable=too-many-nested-blocks, too-many-branches

logger = logging.getLogger(__name__)
#logger.setLevel("DEBUG")

gskjelstad marked this conversation as resolved.
Show resolved Hide resolved

class SubjectParserAWS1(EmailSubjectParser):
Expand All @@ -23,10 +24,25 @@ def parse_subject(self, subject):

Example: AWS Direct Connect Planned Maintenance Notification [AWS Account: 00000001]
"""
data = {}
search = re.search(r"\[AWS Account ?I?D?: ([0-9]+)\]", subject)
if search:
data["account"] = search.group(1)
data = {"account": ""}
# Common Subject strings for matching:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can't find the "account", we should skip adding the key with an empty value. It's better to fail clearly than have a result like the one in tests/unit/data/aws/aws3_subject_parser_result.json

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that's the rub in this case. Some of the emails have the account number in the subject line, while others don't, (it's in the body). If I just skip adding an empty key, there is no other data thats parsed on subject line, and it returns an empty data structure [{}] this breaks the upstream parser tho, as it considers it to have failed parsing vs. an expected empty response. Maybe I'm not understanding it clearly tho' - what would be a good practice to do here when you are EXPECTING there to be no data under certain conditions?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the Providers (e.g., aws), contain different Processors, which are a combination of one or more Parsers.
The way the circuit maintenance parser works is to try to extract part of the Maintenance data from each Parser and then finally combine the data to create a valid Maintenance.
This means that is some Processor has a Parser that is not capturing the proper data it's ok, another one could do in the same process.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright cool, I think after incorporating the feedback you gave me, I think we probably are ready to give this a look over again. Let me know what you think! 👍

subject_map = {
"\[AWS Account ?I?D?: ([0-9]+)\]": "account",
}

regex_keys = re.compile("|".join(subject_map), re.IGNORECASE)

# in case of a multi-line subject
# match the subject map
for line in subject.splitlines():
line_matched = re.search(regex_keys, line)
if not line_matched:
continue
for group_match in line_matched.groups():
if group_match is not None:
for k, v in subject_map.items():
gskjelstad marked this conversation as resolved.
Show resolved Hide resolved
if re.search(k, line, re.IGNORECASE):
data[v] = group_match
return [data]


Expand Down Expand Up @@ -60,29 +76,63 @@ def parse_text(self, text):
This maintenance is scheduled to avoid disrupting redundant connections at =
the same time.
"""
data = {"circuits": []}
text_map = {
"^Account ?I?D?: ([0-9]+)": "account",
"^Start Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "start",
"^End Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "end",
"(?<=from )([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "start_and_end",
}

regex_keys = re.compile("|".join(text_map), re.IGNORECASE)

data = {"circuits": [], "start": "", "end": ""}
impact = Impact.OUTAGE
gskjelstad marked this conversation as resolved.
Show resolved Hide resolved
maintenace_id = ""
status = Status.CONFIRMED

for line in text.splitlines():
if "planned maintenance" in line.lower():
data["summary"] = line
search = re.search(
r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})",
line,
)
if search:
data["start"] = self.dt2ts(parser.parse(search.group(1)))
data["end"] = self.dt2ts(parser.parse(search.group(2)))
maintenace_id += str(data["start"])
maintenace_id += str(data["end"])
# match against the regex strings
line_matched = re.search(regex_keys, line)
# if we have a string that's not in our text_map
# there may still be some strings with data to capture.
# otherwise, continue on.
if not line_matched:
if "may become unavailable" in line.lower():
impact = Impact.OUTAGE
elif "has been cancelled" in line.lower():
status = Status.CANCELLED
elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line):
maintenace_id += line
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))

if re.match(r"[a-z]{5}-[a-z0-9]{8}", line):
maintenace_id += line
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious why haven't you added this Regex to the others?

data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))
continue

# for lines that do match our regex strings.
# grab the data and map the values to keys.
for group_match in line_matched.groups():
if group_match is not None:
for k, v in text_map.items():
gskjelstad marked this conversation as resolved.
Show resolved Hide resolved
if re.search(k, line_matched.string, re.IGNORECASE):
# Due to having a single line on some emails
# This causes multiple match groups
# However this needs to be split across keys.
# This could probably be cleaned up.
if v == "start_and_end" and data["start"] == "":
data["start"] = group_match
elif v == "start_and_end" and data["end"] == "":
data["end"] = group_match
else:
data[v] = group_match

# Let's get our times in order.
if data["start"] and data["end"]:
data["start"] = self.dt2ts(parser.parse(data["start"]))
gskjelstad marked this conversation as resolved.
Show resolved Hide resolved
data["end"] = self.dt2ts(parser.parse(data["end"]))
maintenace_id += str(data["start"])
maintenace_id += str(data["end"])

# No maintenance ID found in emails, so a hash value is being generated using the start,
# end and IDs of all circuits in the notification.
data["maintenance_id"] = hashlib.md5(maintenace_id.encode("utf-8")).hexdigest() # nosec
Expand Down
119 changes: 119 additions & 0 deletions tests/unit/data/aws/aws3.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
Delivered-To: [email protected]
Received: by 2002:a05:7300:a498:b0:db:5402:54d2 with SMTP id ci24csp3189966dyb;
Wed, 27 Sep 2023 23:45:14 -0700 (PDT)
X-Received: by 2002:a05:6830:1b64:b0:6c4:ded2:44d0 with SMTP id d4-20020a0568301b6400b006c4ded244d0mr393870ote.27.1695883513842;
Wed, 27 Sep 2023 23:45:13 -0700 (PDT)
ARC-Seal: i=3; a=rsa-sha256; t=1695883513; cv=pass;
d=google.com; s=arc-20160816;
b=J2AUxyAHbkdx5YmP5xAbEeJM3elEoa14Iwv5t4wjz1RTKLla7KfqHxEZMD1LYoenxs
ZLpE/YDGT+ZSphagfW1mo+veHY27kppDSD00YOjDWdWqOLNUvH6KDvGlkMIEIKATDfI/
5lWcAOTP2h5x7kha2YFpModQRq/fL2727THiXX+BTTWi6r1kF0IAO8lcivXAuM8jaZtM
DYgJAzQW/hcpexKfXz5idvB7cM4TA6+EtsbntwuTILFG6QuY6l09nQOLSnaiDC9WpxXk
Nnveuzzzkx4XklLFCn6AaangILYMa/Ac9kfcOT6MBwOHngH27Pj06PjxF5x7Otk/VLtD
NTqA==
ARC-Message-Signature: i=3; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816;
h=list-unsubscribe:list-archive:list-help:list-post:list-id
:mailing-list:precedence:feedback-id:content-transfer-encoding
:mime-version:subject:message-id:to:from:date:sender:dkim-signature;
bh=YXHwVUeIH05Eff5uu/QBh14WkQXPczhkl0K2x/xaxHI=;
fh=/BZp/CoWS56RYHJk/Nq7+rYFbsXKZMYT25P0V/4q4R8=;
b=El2IxnqZD03wQKR3T3OgYD9VZmQlP/4/0F/G9rkTXGnpqdk6LMM7wtFngfyND14s2/
j49qP6rvxFdU2YNPbm6K0v6UplLq6sq57eE8kKVJ9qIu4hebNR3r75ohqPRwU7rzvoGB
mPjuE2UevG9YfswuQ8/gDeLbBFAP9gyj6BMwYnC9uQRwdy1fYLMizPxPP/WxV+RLbY6C
s7UzQudntDwdE+hFpKHx8mUMrx+4AN/PU4eYyuGjXSrT8qb5LWCC6xFmIIN5LluDMGfO
VNXZBnDMf6paqOUk6SdRDDGR60D5dTR+KMXvc5ilbgmErZDLGMDqlm93ku65R3nnFot2
OmzA==
ARC-Authentication-Results: i=3; mx.google.com;
dkim=pass [email protected] header.s=testdomain header.b=GLHcVmBa;
arc=pass (i=2 spf=pass spfdomain=us-west-2.amazonses.com dkim=pass dkdomain=sns.amazonaws.com dkim=pass dkdomain=amazonses.com dmarc=pass fromdomain=amazonaws.com);
spf=pass (google.com: domain of [email protected] designates 7.7.7.7 as permitted sender) smtp.mailfrom=rd-notices+bncBCSJVSHO64CBB6OB2SUAMGQE3DQKCSQ@testdomain.com;
dmarc=fail (p=QUARANTINE sp=NONE dis=NONE arc=pass) header.from=amazonaws.com
Return-Path: <[email protected]>
Received: from mail-sor-f69.google.com (mail-sor-f69.google.com. [7.7.7.7)
by mx.google.com with SMTPS id z195-20020a4a49cc000000b0057b8079d2f9sor1288663ooa.3.2023.09.27.23.45.13
for <[email protected]>
(Google Transport Security);
Wed, 27 Sep 2023 23:45:13 -0700 (PDT)
Received-SPF: pass (google.com: domain of [email protected] designates 7.7.7.7 as permitted sender) client-ip=7.7.7.7;
Sender: [email protected]
X-Gm-Message-State: AOJu0YwA1ncYCL1JhSr58XiTdslkwS2bbAyUG8XhiJs3xZZJ3Ccy9WF5
b8y79QbLjF9OquocCHSQC0PxicdI
X-Google-Smtp-Source: AGHT+IHRVxhXNJLs7Sr7hKiGQj5axz7trO3ifhk17zVerbtpqBwzCR3N9tJiSMksqUUrB6MOLmrSLg==
X-Received: by 2002:a4a:d138:0:b0:57e:1618:e700 with SMTP id n24-20020a4ad138000000b0057e1618e700mr147723oor.7.1695883513473;
Wed, 27 Sep 2023 23:45:13 -0700 (PDT)
X-BeenThere: [email protected]
Received: by 2002:a4a:554d:0:b0:573:f543:8c29 with SMTP id e74-20020a4a554d000000b00573f5438c29ls1795866oob.1.-pod-prod-01-us;
Wed, 27 Sep 2023 23:45:12 -0700 (PDT)
X-Received: by 2002:a54:4002:0:b0:3a7:8725:f37c with SMTP id x2-20020a544002000000b003a78725f37cmr391884oie.10.1695883512779;
Wed, 27 Sep 2023 23:45:12 -0700 (PDT)
Received: from a59-201.smtp-out.us-west-2.amazonses.com (a59-201.smtp-out.us-west-2.amazonses.com. [7.7.7.7])
by mx.google.com with ESMTPS id f20-20020a637554000000b00578b785d46csi18216323pgn.193.2023.09.27.23.45.12
for <[email protected]>
(version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128);
Wed, 27 Sep 2023 23:45:12 -0700 (PDT)
Date: Thu, 28 Sep 2023 06:45:12 +0000
From: DXMaintNotify-RealDirect <[email protected]>
To: [email protected]
Message-ID: <0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83442c6-000000@us-west-2.amazonses.com>
Subject: [rd-notices] AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED
MIME-Version: 1.0
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
x-amz-sns-message-id: c08baa17-4211-5fca-a32c-f79861293c18
x-amz-sns-subscription-arn: arn:aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2b2e141
Feedback-ID: 1.us-west-2.c55J8LO2Yl1R0Ht+ysI6VjzUH6Cvo3dHPF80AUVC/G8=:AmazonSES
X-SES-Outgoing: 2023.09.28-54.240.59.201
X-Original-Sender: [email protected]
Precedence: list
Mailing-list: list [email protected]; contact [email protected]
List-ID: <rd-notices.testdomain.com>
X-Spam-Checked-In-Group: [email protected]
X-Google-Group-Id: 536184160288
List-Post: <https://groups.google.com/a/testdomain.com/group/rd-notices/post>, <mailto:[email protected]>
List-Help: <https://support.google.com/a/testdomain.com/bin/topic.py?topic=25838>,
<mailto:[email protected]>
List-Archive: <https://groups.google.com/a/testdomain.com/group/rd-notices/>
List-Unsubscribe: <mailto:[email protected]>,
<https://groups.google.com/a/testdomain.com/group/rd-notices/subscribe>

Planned maintenance has been scheduled on an AWS Direct Connect endpoint in=
Westin Building Exchange, Seattle, WA. During this maintenance window, you=
r AWS Direct Connect services associated with this event may become unavail=
able.\n\nThis maintenance is scheduled to avoid disrupting redundant connec=
tions at the same time.\n\nIf you encounter any problems with your connecti=
on after the end of this maintenance window, please contact AWS Support(1).=
\n\n(1) https://aws.amazon.com/support. For more details, please see https:=
//phd.aws.amazon.com/phd/home?region=3Dus-west-2#/dashboard/open-issues

Region: us-west-2
Account Id: 0000000000001

Affected Resources:
xxxxx-ffffffff
yyyyy-uuuuuuuu
mmmmm-iiiiiiii
rrrrr-pppppppp
fffff-qqqqqqqq

Start Time: Thu, 12 Oct 2023 07:00:00 GMT
End Time: Thu, 12 Oct 2023 13:00:00 GMT

--
If you wish to stop receiving notifications from this topic, please click o=
r visit the link below to unsubscribe:
https://sns.us-west-2.amazonaws.com/unsubscribe.html?SubscriptionArn=3Darn:=
aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2=
b2e141&[email protected]

Please do not reply directly to this email. If you have any questions or co=
mments regarding this email, please contact us at https://aws.amazon.com/su=
pport

--=20
You received this message because you are subscribed to the Google Groups "=
Real Direct Notices" group.
To unsubscribe from this group and stop receiving emails from it, send an e=
mail to [email protected].
To view this discussion on the web visit https://groups.google.com/a/Realga=
mes.com/d/msgid/rd-notices/0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83=
442c6-000000%40us-west-2.amazonses.com.
37 changes: 37 additions & 0 deletions tests/unit/data/aws/aws3_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[
{
"account": "0000000000001",
"circuits": [
{
"circuit_id": "xxxxx-ffffffff",
"impact": "OUTAGE"
},
{
"circuit_id": "yyyyy-uuuuuuuu",
"impact": "OUTAGE"
},
{
"circuit_id": "mmmmm-iiiiiiii",
"impact": "OUTAGE"
},
{
"circuit_id": "rrrrr-pppppppp",
"impact": "OUTAGE"
},
{
"circuit_id": "fffff-qqqqqqqq",
"impact": "OUTAGE"
}
],
"end": 1697115600,
"maintenance_id": "b15bf3344836f5ad8ab6a6e16cf328f8",
"organizer": "[email protected]",
"provider": "aws",
"sequence": 1,
"stamp": 1695883512,
"start": 1697094000,
"status": "CONFIRMED",
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Westin Building Exchange, Seattle, WA. During this maintenance window, your AWS Direct Connect services associated with this event may become unavailable.\\n\\nThis maintenance is scheduled to avoid disrupting redundant connections at the same time.\\n\\nIf you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support(1).\\n\\n(1) https://aws.amazon.com/support. For more details, please see https://phd.aws.amazon.com/phd/home?region=us-west-2#/dashboard/open-issues",
"uid": "0"
}
]
5 changes: 5 additions & 0 deletions tests/unit/data/aws/aws3_subject_parser_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
{
"account": ""
}
]
35 changes: 35 additions & 0 deletions tests/unit/data/aws/aws3_text_parser_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[
{
"circuits": [
{
"circuit_id": "aaaaa-00000001",
"impact": "OUTAGE"
},
{
"circuit_id": "aaaaa-00000002",
"impact": "OUTAGE"
},
{
"circuit_id": "aaaaa-00000003",
"impact": "OUTAGE"
},
{
"circuit_id": "aaaaa-00000004",
"impact": "OUTAGE"
},
{
"circuit_id": "aaaaa-00000005",
"impact": "OUTAGE"
},
{
"circuit_id": "aaaaa-00000006",
"impact": "OUTAGE"
}
],
"end": 1631584920,
"maintenance_id": "47876b7d5a5198643a1a9cb7f954487a",
"start": 1631559720,
"status": "CANCELLED",
"summary": "We would like to inform you that the planned maintenance that was scheduled for AWS Direct Connect endpoint in Equinix SG2, Singapore, SGP from Mon, 13 Sep 2021 19:02:00 GMT to Tue, 14 Sep 2021 02:02:00 GMT has been cancelled. Please find below your AWS Direct Connect services that would have been affected by this planned maintenance."
}
]
9 changes: 9 additions & 0 deletions tests/unit/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,15 @@
Path(dir_path, "data", "aws", "aws2_result.json"),
],
),
(
AWS,
[
("email", Path(dir_path, "data", "aws", "aws3.eml")),
],
[
Path(dir_path, "data", "aws", "aws3_result.json"),
],
),
# BSO
(
BSO,
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@
Path(dir_path, "data", "aws", "aws2.eml"),
Path(dir_path, "data", "aws", "aws2_subject_parser_result.json"),
),
(
TextParserAWS1,
Path(dir_path, "data", "aws", "aws3.eml"),
Path(dir_path, "data", "aws", "aws3_text_parser_result.json"),
),
(
SubjectParserAWS1,
Path(dir_path, "data", "aws", "aws3.eml"),
Path(dir_path, "data", "aws", "aws3_subject_parser_result.json"),
),
# BSO
(
HtmlParserBSO1,
Expand Down
Loading