diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 4c7d9439..d5946904 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -46,7 +46,7 @@ jobs:
make html
- name: Check code style
run: |
- ruff check *.py parsedmarc/*.py
+ ruff check .
- name: Run unit tests
run: |
coverage run tests.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0022640d..1a1d6cd1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,15 @@
Changelog
=========
+8.15.1
+------
+
+- Proper IMAP namespace fix (Closes issue #557 and issue #563)
+ - Require `mailsuite>=1.9.17`
+ - Revert PR #552
+- Add pre-flight check for nameservers (PR #562 closes issue #543)
+- Reformat code with `ruff`
+
8.15.0
------
diff --git a/build.sh b/build.sh
index 658ff78e..ce37510d 100755
--- a/build.sh
+++ b/build.sh
@@ -8,7 +8,7 @@ fi
. venv/bin/activate
pip install -U -r requirements.txt
-flake8 parsedmarc
+ruff format .
cd docs
make clean
make html
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8a3acd3b..a83821ac 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -12,15 +12,16 @@
#
import os
import sys
+
sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
from parsedmarc import __version__
# -- Project information -----------------------------------------------------
-project = 'parsedmarc'
-copyright = '2018 - 2023, Sean Whalen and contributors'
-author = 'Sean Whalen and contributors'
+project = "parsedmarc"
+copyright = "2018 - 2023, Sean Whalen and contributors"
+author = "Sean Whalen and contributors"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@@ -36,13 +37,15 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
-extensions = ['sphinx.ext.autodoc',
- 'sphinx.ext.doctest',
- 'sphinx.ext.todo',
- 'sphinx.ext.viewcode',
- 'sphinx.ext.githubpages',
- 'sphinx.ext.napoleon',
- 'myst_parser']
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx.ext.doctest",
+ "sphinx.ext.todo",
+ "sphinx.ext.viewcode",
+ "sphinx.ext.githubpages",
+ "sphinx.ext.napoleon",
+ "myst_parser",
+]
myst_enable_extensions = [
"amsmath",
@@ -64,7 +67,7 @@
autoclass_content = "init"
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffixes of source filenames.
@@ -81,13 +84,11 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
-html_theme_options = {
- 'globaltoc_collapse': False
-}
+html_theme_options = {"globaltoc_collapse": False}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 4354c0ff..3fa334bd 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -34,7 +34,7 @@
from parsedmarc.utils import parse_email
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
-__version__ = "8.15.0"
+__version__ = "8.15.1"
logger.debug("parsedmarc v{0}".format(__version__))
@@ -43,8 +43,8 @@
xml_schema_regex = re.compile(r"??xs:schema.*>", re.MULTILINE)
text_report_regex = re.compile(r"\s*([a-zA-Z\s]+):\s(.+)", re.MULTILINE)
-MAGIC_ZIP = b"\x50\x4B\x03\x04"
-MAGIC_GZIP = b"\x1F\x8B"
+MAGIC_ZIP = b"\x50\x4b\x03\x04"
+MAGIC_GZIP = b"\x1f\x8b"
MAGIC_XML = b"\x3c\x3f\x78\x6d\x6c\x20"
MAGIC_JSON = b"\7b"
@@ -72,12 +72,16 @@ class InvalidForensicReport(InvalidDMARCReport):
"""Raised when an invalid DMARC forensic report is encountered"""
-def _parse_report_record(record, ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- offline=False,
- nameservers=None, dns_timeout=2.0):
+def _parse_report_record(
+ record,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ offline=False,
+ nameservers=None,
+ dns_timeout=2.0,
+):
"""
Converts a record from a DMARC aggregate report into a more consistent
format
@@ -110,15 +114,19 @@ def _parse_report_record(record, ip_db_path=None,
reverse_dns_map=REVERSE_DNS_MAP,
offline=offline,
nameservers=nameservers,
- timeout=dns_timeout)
+ timeout=dns_timeout,
+ )
new_record["source"] = new_record_source
new_record["count"] = int(record["row"]["count"])
policy_evaluated = record["row"]["policy_evaluated"].copy()
- new_policy_evaluated = OrderedDict([("disposition", "none"),
- ("dkim", "fail"),
- ("spf", "fail"),
- ("policy_override_reasons", [])
- ])
+ new_policy_evaluated = OrderedDict(
+ [
+ ("disposition", "none"),
+ ("dkim", "fail"),
+ ("spf", "fail"),
+ ("policy_override_reasons", []),
+ ]
+ )
if "disposition" in policy_evaluated:
new_policy_evaluated["disposition"] = policy_evaluated["disposition"]
if new_policy_evaluated["disposition"].strip().lower() == "pass":
@@ -128,10 +136,14 @@ def _parse_report_record(record, ip_db_path=None,
if "spf" in policy_evaluated:
new_policy_evaluated["spf"] = policy_evaluated["spf"]
reasons = []
- spf_aligned = policy_evaluated["spf"] is not None and policy_evaluated[
- "spf"].lower() == "pass"
- dkim_aligned = policy_evaluated["dkim"] is not None and policy_evaluated[
- "dkim"].lower() == "pass"
+ spf_aligned = (
+ policy_evaluated["spf"] is not None
+ and policy_evaluated["spf"].lower() == "pass"
+ )
+ dkim_aligned = (
+ policy_evaluated["dkim"] is not None
+ and policy_evaluated["dkim"].lower() == "pass"
+ )
dmarc_aligned = spf_aligned or dkim_aligned
new_record["alignment"] = dict()
new_record["alignment"]["spf"] = spf_aligned
@@ -155,7 +167,7 @@ def _parse_report_record(record, ip_db_path=None,
if type(new_record["identifiers"]["header_from"]) is str:
lowered_from = new_record["identifiers"]["header_from"].lower()
else:
- lowered_from = ''
+ lowered_from = ""
new_record["identifiers"]["header_from"] = lowered_from
if record["auth_results"] is not None:
auth_results = record["auth_results"].copy()
@@ -231,29 +243,30 @@ def _parse_smtp_tls_failure_details(failure_details):
)
if "sending-mta-ip" in failure_details:
- new_failure_details["sending_mta_ip"] = failure_details[
- "sending-mta-ip"]
+ new_failure_details["sending_mta_ip"] = failure_details["sending-mta-ip"]
if "receiving-ip" in failure_details:
- new_failure_details["receiving_ip"] = failure_details[
- "receiving-ip"]
+ new_failure_details["receiving_ip"] = failure_details["receiving-ip"]
if "receiving-mx-hostname" in failure_details:
new_failure_details["receiving_mx_hostname"] = failure_details[
- "receiving-mx-hostname"]
+ "receiving-mx-hostname"
+ ]
if "receiving-mx-helo" in failure_details:
new_failure_details["receiving_mx_helo"] = failure_details[
- "receiving-mx-helo"]
+ "receiving-mx-helo"
+ ]
if "additional-info-uri" in failure_details:
new_failure_details["additional_info_uri"] = failure_details[
- "additional-info-uri"]
+ "additional-info-uri"
+ ]
if "failure-reason-code" in failure_details:
new_failure_details["failure_reason_code"] = failure_details[
- "failure-reason-code"]
+ "failure-reason-code"
+ ]
return new_failure_details
except KeyError as e:
- raise InvalidSMTPTLSReport(f"Missing required failure details field:"
- f" {e}")
+ raise InvalidSMTPTLSReport(f"Missing required failure details field:" f" {e}")
except Exception as e:
raise InvalidSMTPTLSReport(str(e))
@@ -265,29 +278,26 @@ def _parse_smtp_tls_report_policy(policy):
policy_type = policy["policy"]["policy-type"]
failure_details = []
if policy_type not in policy_types:
- raise InvalidSMTPTLSReport(f"Invalid policy type "
- f"{policy_type}")
- new_policy = OrderedDict(policy_domain=policy_domain,
- policy_type=policy_type)
+ raise InvalidSMTPTLSReport(f"Invalid policy type " f"{policy_type}")
+ new_policy = OrderedDict(policy_domain=policy_domain, policy_type=policy_type)
if "policy-string" in policy["policy"]:
if isinstance(policy["policy"]["policy-string"], list):
if len(policy["policy"]["policy-string"]) > 0:
- new_policy["policy_strings"] = policy["policy"][
- "policy-string"]
+ new_policy["policy_strings"] = policy["policy"]["policy-string"]
if "mx-host-pattern" in policy["policy"]:
if isinstance(policy["policy"]["mx-host-pattern"], list):
if len(policy["policy"]["mx-host-pattern"]) > 0:
- new_policy["mx_host_patterns"] = policy["policy"][
- "mx-host-pattern"]
+ new_policy["mx_host_patterns"] = policy["policy"]["mx-host-pattern"]
new_policy["successful_session_count"] = policy["summary"][
- "total-successful-session-count"]
+ "total-successful-session-count"
+ ]
new_policy["failed_session_count"] = policy["summary"][
- "total-failure-session-count"]
+ "total-failure-session-count"
+ ]
if "failure-details" in policy:
for details in policy["failure-details"]:
- failure_details.append(_parse_smtp_tls_failure_details(
- details))
+ failure_details.append(_parse_smtp_tls_failure_details(details))
new_policy["failure_details"] = failure_details
return new_policy
@@ -300,9 +310,13 @@ def _parse_smtp_tls_report_policy(policy):
def parse_smtp_tls_report_json(report):
"""Parses and validates an SMTP TLS report"""
- required_fields = ["organization-name", "date-range",
- "contact-info", "report-id",
- "policies"]
+ required_fields = [
+ "organization-name",
+ "date-range",
+ "contact-info",
+ "report-id",
+ "policies",
+ ]
try:
policies = []
@@ -312,8 +326,9 @@ def parse_smtp_tls_report_json(report):
raise Exception(f"Missing required field: {required_field}]")
if not isinstance(report["policies"], list):
policies_type = type(report["policies"])
- raise InvalidSMTPTLSReport(f"policies must be a list, "
- f"not {policies_type}")
+ raise InvalidSMTPTLSReport(
+ f"policies must be a list, " f"not {policies_type}"
+ )
for policy in report["policies"]:
policies.append(_parse_smtp_tls_report_policy(policy))
@@ -323,7 +338,7 @@ def parse_smtp_tls_report_json(report):
end_date=report["date-range"]["end-datetime"],
contact_info=report["contact-info"],
report_id=report["report-id"],
- policies=policies
+ policies=policies,
)
return new_report
@@ -346,18 +361,18 @@ def parsed_smtp_tls_reports_to_csv_rows(reports):
organization_name=report["organization_name"],
begin_date=report["begin_date"],
end_date=report["end_date"],
- report_id=report["report_id"]
+ report_id=report["report_id"],
)
record = common_fields.copy()
for policy in report["policies"]:
if "policy_strings" in policy:
record["policy_strings"] = "|".join(policy["policy_strings"])
if "mx_host_patterns" in policy:
- record["mx_host_patterns"] = "|".join(
- policy["mx_host_patterns"])
+ record["mx_host_patterns"] = "|".join(policy["mx_host_patterns"])
successful_record = record.copy()
successful_record["successful_session_count"] = policy[
- "successful_session_count"]
+ "successful_session_count"
+ ]
rows.append(successful_record)
if "failure_details" in policy:
for failure_details in policy["failure_details"]:
@@ -381,12 +396,25 @@ def parsed_smtp_tls_reports_to_csv(reports):
str: Parsed aggregate report data in flat CSV format, including headers
"""
- fields = ["organization_name", "begin_date", "end_date", "report_id",
- "result_type", "successful_session_count",
- "failed_session_count", "policy_domain", "policy_type",
- "policy_strings", "mx_host_patterns", "sending_mta_ip",
- "receiving_ip", "receiving_mx_hostname", "receiving_mx_helo",
- "additional_info_uri", "failure_reason_code"]
+ fields = [
+ "organization_name",
+ "begin_date",
+ "end_date",
+ "report_id",
+ "result_type",
+ "successful_session_count",
+ "failed_session_count",
+ "policy_domain",
+ "policy_type",
+ "policy_strings",
+ "mx_host_patterns",
+ "sending_mta_ip",
+ "receiving_ip",
+ "receiving_mx_hostname",
+ "receiving_mx_helo",
+ "additional_info_uri",
+ "failure_reason_code",
+ ]
csv_file_object = StringIO(newline="\n")
writer = DictWriter(csv_file_object, fields)
@@ -402,15 +430,16 @@ def parsed_smtp_tls_reports_to_csv(reports):
def parse_aggregate_report_xml(
- xml,
- ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- offline=False,
- nameservers=None,
- timeout=2.0,
- keep_alive=None):
+ xml,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ offline=False,
+ nameservers=None,
+ timeout=2.0,
+ keep_alive=None,
+):
"""Parses a DMARC XML report string and returns a consistent OrderedDict
Args:
@@ -431,26 +460,27 @@ def parse_aggregate_report_xml(
errors = []
# Parse XML and recover from errors
if isinstance(xml, bytes):
- xml = xml.decode(errors='ignore')
+ xml = xml.decode(errors="ignore")
try:
xmltodict.parse(xml)["feedback"]
except Exception as e:
errors.append("Invalid XML: {0}".format(e.__str__()))
try:
tree = etree.parse(
- BytesIO(xml.encode('utf-8')),
- etree.XMLParser(recover=True, resolve_entities=False))
+ BytesIO(xml.encode("utf-8")),
+ etree.XMLParser(recover=True, resolve_entities=False),
+ )
s = etree.tostring(tree)
- xml = '' if s is None else s.decode('utf-8')
+ xml = "" if s is None else s.decode("utf-8")
except Exception:
- xml = u''
+ xml = ""
try:
# Replace XML header (sometimes they are invalid)
- xml = xml_header_regex.sub("", xml)
+ xml = xml_header_regex.sub('', xml)
# Remove invalid schema tags
- xml = xml_schema_regex.sub('', xml)
+ xml = xml_schema_regex.sub("", xml)
report = xmltodict.parse(xml)["feedback"]
report_metadata = report["report_metadata"]
@@ -461,20 +491,21 @@ def parse_aggregate_report_xml(
new_report_metadata = OrderedDict()
if report_metadata["org_name"] is None:
if report_metadata["email"] is not None:
- report_metadata["org_name"] = report_metadata[
- "email"].split("@")[-1]
+ report_metadata["org_name"] = report_metadata["email"].split("@")[-1]
org_name = report_metadata["org_name"]
if org_name is not None and " " not in org_name:
new_org_name = get_base_domain(org_name)
if new_org_name is not None:
org_name = new_org_name
if not org_name:
- logger.debug("Could not parse org_name from XML.\r\n{0}".format(
- report.__str__()
- ))
- raise KeyError("Organization name is missing. \
+ logger.debug(
+ "Could not parse org_name from XML.\r\n{0}".format(report.__str__())
+ )
+ raise KeyError(
+ "Organization name is missing. \
This field is a requirement for \
- saving the report")
+ saving the report"
+ )
new_report_metadata["org_name"] = org_name
new_report_metadata["org_email"] = report_metadata["email"]
extra = None
@@ -483,11 +514,10 @@ def parse_aggregate_report_xml(
new_report_metadata["org_extra_contact_info"] = extra
new_report_metadata["report_id"] = report_metadata["report_id"]
report_id = new_report_metadata["report_id"]
- report_id = report_id.replace("<",
- "").replace(">", "").split("@")[0]
+ report_id = report_id.replace("<", "").replace(">", "").split("@")[0]
new_report_metadata["report_id"] = report_id
date_range = report["report_metadata"]["date_range"]
- if int(date_range["end"]) - int(date_range["begin"]) > 2*86400:
+ if int(date_range["end"]) - int(date_range["begin"]) > 2 * 86400:
_error = "Time span > 24 hours - RFC 7489 section 7.2"
errors.append(_error)
date_range["begin"] = timestamp_to_human(date_range["begin"])
@@ -540,8 +570,7 @@ def parse_aggregate_report_xml(
if keep_alive is not None and i > 0 and i % 20 == 0:
logger.debug("Sending keepalive cmd")
keep_alive()
- logger.debug("Processed {0}/{1}".format(
- i, len(report["record"])))
+ logger.debug("Processed {0}/{1}".format(i, len(report["record"])))
try:
report_record = _parse_report_record(
report["record"][i],
@@ -551,7 +580,8 @@ def parse_aggregate_report_xml(
reverse_dns_map_path=reverse_dns_map_path,
reverse_dns_map_url=reverse_dns_map_url,
nameservers=nameservers,
- dns_timeout=timeout)
+ dns_timeout=timeout,
+ )
records.append(report_record)
except Exception as e:
logger.warning("Could not parse record: {0}".format(e))
@@ -565,7 +595,8 @@ def parse_aggregate_report_xml(
reverse_dns_map_url=reverse_dns_map_url,
offline=offline,
nameservers=nameservers,
- dns_timeout=timeout)
+ dns_timeout=timeout,
+ )
records.append(report_record)
new_report["records"] = records
@@ -573,18 +604,15 @@ def parse_aggregate_report_xml(
return new_report
except expat.ExpatError as error:
- raise InvalidAggregateReport(
- "Invalid XML: {0}".format(error.__str__()))
+ raise InvalidAggregateReport("Invalid XML: {0}".format(error.__str__()))
except KeyError as error:
- raise InvalidAggregateReport(
- "Missing field: {0}".format(error.__str__()))
+ raise InvalidAggregateReport("Missing field: {0}".format(error.__str__()))
except AttributeError:
raise InvalidAggregateReport("Report missing required section")
except Exception as error:
- raise InvalidAggregateReport(
- "Unexpected error: {0}".format(error.__str__()))
+ raise InvalidAggregateReport("Unexpected error: {0}".format(error.__str__()))
def extract_report(content):
@@ -618,14 +646,13 @@ def extract_report(content):
file_object.seek(0)
if header.startswith(MAGIC_ZIP):
_zip = zipfile.ZipFile(file_object)
- report = _zip.open(_zip.namelist()[0]).read().decode(
- errors='ignore')
+ report = _zip.open(_zip.namelist()[0]).read().decode(errors="ignore")
elif header.startswith(MAGIC_GZIP):
- report = zlib.decompress(
- file_object.read(),
- zlib.MAX_WBITS | 16).decode(errors='ignore')
+ report = zlib.decompress(file_object.read(), zlib.MAX_WBITS | 16).decode(
+ errors="ignore"
+ )
elif header.startswith(MAGIC_XML) or header.startswith(MAGIC_JSON):
- report = file_object.read().decode(errors='ignore')
+ report = file_object.read().decode(errors="ignore")
else:
file_object.close()
raise ParserError("Not a valid zip, gzip, json, or xml file")
@@ -637,8 +664,7 @@ def extract_report(content):
raise ParserError("File objects must be opened in binary (rb) mode")
except Exception as error:
file_object.close()
- raise ParserError(
- "Invalid archive file: {0}".format(error.__str__()))
+ raise ParserError("Invalid archive file: {0}".format(error.__str__()))
return report
@@ -653,15 +679,16 @@ def extract_report_from_file_path(file_path):
def parse_aggregate_report_file(
- _input,
- offline=False,
- always_use_local_files=None,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- ip_db_path=None,
- nameservers=None,
- dns_timeout=2.0,
- keep_alive=None):
+ _input,
+ offline=False,
+ always_use_local_files=None,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ ip_db_path=None,
+ nameservers=None,
+ dns_timeout=2.0,
+ keep_alive=None,
+):
"""Parses a file at the given path, a file-like object. or bytes as an
aggregate DMARC report
@@ -695,7 +722,8 @@ def parse_aggregate_report_file(
offline=offline,
nameservers=nameservers,
timeout=dns_timeout,
- keep_alive=keep_alive)
+ keep_alive=keep_alive,
+ )
def parsed_aggregate_reports_to_csv_rows(reports):
@@ -736,12 +764,23 @@ def to_str(obj):
pct = report["policy_published"]["pct"]
fo = report["policy_published"]["fo"]
- report_dict = dict(xml_schema=xml_schema, org_name=org_name,
- org_email=org_email,
- org_extra_contact_info=org_extra_contact,
- report_id=report_id, begin_date=begin_date,
- end_date=end_date, errors=errors, domain=domain,
- adkim=adkim, aspf=aspf, p=p, sp=sp, pct=pct, fo=fo)
+ report_dict = dict(
+ xml_schema=xml_schema,
+ org_name=org_name,
+ org_email=org_email,
+ org_extra_contact_info=org_extra_contact,
+ report_id=report_id,
+ begin_date=begin_date,
+ end_date=end_date,
+ errors=errors,
+ domain=domain,
+ adkim=adkim,
+ aspf=aspf,
+ p=p,
+ sp=sp,
+ pct=pct,
+ fo=fo,
+ )
for record in report["records"]:
row = report_dict.copy()
@@ -756,18 +795,20 @@ def to_str(obj):
row["dkim_aligned"] = record["alignment"]["dkim"]
row["dmarc_aligned"] = record["alignment"]["dmarc"]
row["disposition"] = record["policy_evaluated"]["disposition"]
- policy_override_reasons = list(map(
- lambda r_: r_["type"] or "none",
- record["policy_evaluated"]
- ["policy_override_reasons"]))
- policy_override_comments = list(map(
- lambda r_: r_["comment"] or "none",
- record["policy_evaluated"]
- ["policy_override_reasons"]))
- row["policy_override_reasons"] = ",".join(
- policy_override_reasons)
- row["policy_override_comments"] = "|".join(
- policy_override_comments)
+ policy_override_reasons = list(
+ map(
+ lambda r_: r_["type"] or "none",
+ record["policy_evaluated"]["policy_override_reasons"],
+ )
+ )
+ policy_override_comments = list(
+ map(
+ lambda r_: r_["comment"] or "none",
+ record["policy_evaluated"]["policy_override_reasons"],
+ )
+ )
+ row["policy_override_reasons"] = ",".join(policy_override_reasons)
+ row["policy_override_comments"] = "|".join(policy_override_comments)
row["envelope_from"] = record["identifiers"]["envelope_from"]
row["header_from"] = record["identifiers"]["header_from"]
envelope_to = record["identifiers"]["envelope_to"]
@@ -798,7 +839,7 @@ def to_str(obj):
for r in rows:
for k, v in r.items():
if type(v) not in [str, int, bool]:
- r[k] = ''
+ r[k] = ""
return rows
@@ -815,16 +856,45 @@ def parsed_aggregate_reports_to_csv(reports):
str: Parsed aggregate report data in flat CSV format, including headers
"""
- fields = ["xml_schema", "org_name", "org_email",
- "org_extra_contact_info", "report_id", "begin_date", "end_date",
- "errors", "domain", "adkim", "aspf", "p", "sp", "pct", "fo",
- "source_ip_address", "source_country", "source_reverse_dns",
- "source_base_domain", "source_name", "source_type", "count",
- "spf_aligned", "dkim_aligned", "dmarc_aligned", "disposition",
- "policy_override_reasons", "policy_override_comments",
- "envelope_from", "header_from",
- "envelope_to", "dkim_domains", "dkim_selectors", "dkim_results",
- "spf_domains", "spf_scopes", "spf_results"]
+ fields = [
+ "xml_schema",
+ "org_name",
+ "org_email",
+ "org_extra_contact_info",
+ "report_id",
+ "begin_date",
+ "end_date",
+ "errors",
+ "domain",
+ "adkim",
+ "aspf",
+ "p",
+ "sp",
+ "pct",
+ "fo",
+ "source_ip_address",
+ "source_country",
+ "source_reverse_dns",
+ "source_base_domain",
+ "source_name",
+ "source_type",
+ "count",
+ "spf_aligned",
+ "dkim_aligned",
+ "dmarc_aligned",
+ "disposition",
+ "policy_override_reasons",
+ "policy_override_comments",
+ "envelope_from",
+ "header_from",
+ "envelope_to",
+ "dkim_domains",
+ "dkim_selectors",
+ "dkim_results",
+ "spf_domains",
+ "spf_scopes",
+ "spf_results",
+ ]
csv_file_object = StringIO(newline="\n")
writer = DictWriter(csv_file_object, fields)
@@ -839,17 +909,19 @@ def parsed_aggregate_reports_to_csv(reports):
return csv_file_object.getvalue()
-def parse_forensic_report(feedback_report,
- sample,
- msg_date,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- offline=False,
- ip_db_path=None,
- nameservers=None,
- dns_timeout=2.0,
- strip_attachment_payloads=False):
+def parse_forensic_report(
+ feedback_report,
+ sample,
+ msg_date,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ offline=False,
+ ip_db_path=None,
+ nameservers=None,
+ dns_timeout=2.0,
+ strip_attachment_payloads=False,
+):
"""
Converts a DMARC forensic report and sample to a ``OrderedDict``
@@ -882,8 +954,7 @@ def parse_forensic_report(feedback_report,
if "arrival_date" not in parsed_report:
if msg_date is None:
- raise InvalidForensicReport(
- "Forensic sample is not a valid email")
+ raise InvalidForensicReport("Forensic sample is not a valid email")
parsed_report["arrival_date"] = msg_date.isoformat()
if "version" not in parsed_report:
@@ -903,11 +974,12 @@ def parse_forensic_report(feedback_report,
parsed_report["delivery_result"] = "other"
arrival_utc = human_timestamp_to_datetime(
- parsed_report["arrival_date"], to_utc=True)
+ parsed_report["arrival_date"], to_utc=True
+ )
arrival_utc = arrival_utc.strftime("%Y-%m-%d %H:%M:%S")
parsed_report["arrival_date_utc"] = arrival_utc
- ip_address = re.split(r'\s', parsed_report["source_ip"]).pop(0)
+ ip_address = re.split(r"\s", parsed_report["source_ip"]).pop(0)
parsed_report_source = get_ip_address_info(
ip_address,
cache=IP_ADDRESS_CACHE,
@@ -918,7 +990,8 @@ def parse_forensic_report(feedback_report,
reverse_dns_map=REVERSE_DNS_MAP,
offline=offline,
nameservers=nameservers,
- timeout=dns_timeout)
+ timeout=dns_timeout,
+ )
parsed_report["source"] = parsed_report_source
del parsed_report["source_ip"]
@@ -938,15 +1011,19 @@ def parse_forensic_report(feedback_report,
auth_failure = parsed_report["auth_failure"].split(",")
parsed_report["auth_failure"] = auth_failure
- optional_fields = ["original_envelope_id", "dkim_domain",
- "original_mail_from", "original_rcpt_to"]
+ optional_fields = [
+ "original_envelope_id",
+ "dkim_domain",
+ "original_mail_from",
+ "original_rcpt_to",
+ ]
for optional_field in optional_fields:
if optional_field not in parsed_report:
parsed_report[optional_field] = None
parsed_sample = parse_email(
- sample,
- strip_attachment_payloads=strip_attachment_payloads)
+ sample, strip_attachment_payloads=strip_attachment_payloads
+ )
if "reported_domain" not in parsed_report:
parsed_report["reported_domain"] = parsed_sample["from"]["domain"]
@@ -966,12 +1043,10 @@ def parse_forensic_report(feedback_report,
return parsed_report
except KeyError as error:
- raise InvalidForensicReport("Missing value: {0}".format(
- error.__str__()))
+ raise InvalidForensicReport("Missing value: {0}".format(error.__str__()))
except Exception as error:
- raise InvalidForensicReport(
- "Unexpected error: {0}".format(error.__str__()))
+ raise InvalidForensicReport("Unexpected error: {0}".format(error.__str__()))
def parsed_forensic_reports_to_csv_rows(reports):
@@ -1002,8 +1077,7 @@ def parsed_forensic_reports_to_csv_rows(reports):
row["subject"] = report["parsed_sample"]["subject"]
row["auth_failure"] = ",".join(report["auth_failure"])
authentication_mechanisms = report["authentication_mechanisms"]
- row["authentication_mechanisms"] = ",".join(
- authentication_mechanisms)
+ row["authentication_mechanisms"] = ",".join(authentication_mechanisms)
del row["sample"]
del row["parsed_sample"]
rows.append(row)
@@ -1022,14 +1096,31 @@ def parsed_forensic_reports_to_csv(reports):
Returns:
str: Parsed forensic report data in flat CSV format, including headers
"""
- fields = ["feedback_type", "user_agent", "version", "original_envelope_id",
- "original_mail_from", "original_rcpt_to", "arrival_date",
- "arrival_date_utc", "subject", "message_id",
- "authentication_results", "dkim_domain", "source_ip_address",
- "source_country", "source_reverse_dns",
- "source_base_domain", "source_name", "source_type",
- "delivery_result", "auth_failure", "reported_domain",
- "authentication_mechanisms", "sample_headers_only"]
+ fields = [
+ "feedback_type",
+ "user_agent",
+ "version",
+ "original_envelope_id",
+ "original_mail_from",
+ "original_rcpt_to",
+ "arrival_date",
+ "arrival_date_utc",
+ "subject",
+ "message_id",
+ "authentication_results",
+ "dkim_domain",
+ "source_ip_address",
+ "source_country",
+ "source_reverse_dns",
+ "source_base_domain",
+ "source_name",
+ "source_type",
+ "delivery_result",
+ "auth_failure",
+ "reported_domain",
+ "authentication_mechanisms",
+ "sample_headers_only",
+ ]
csv_file = StringIO()
csv_writer = DictWriter(csv_file, fieldnames=fields)
@@ -1047,15 +1138,17 @@ def parsed_forensic_reports_to_csv(reports):
def parse_report_email(
- input_,
- offline=False,
- ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- nameservers=None, dns_timeout=2.0,
- strip_attachment_payloads=False,
- keep_alive=None):
+ input_,
+ offline=False,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ nameservers=None,
+ dns_timeout=2.0,
+ strip_attachment_payloads=False,
+ keep_alive=None,
+):
"""
Parses a DMARC report from an email
@@ -1088,8 +1181,7 @@ def parse_report_email(
msg_headers = json.loads(msg.headers_json)
date = email.utils.format_datetime(datetime.utcnow())
if "Date" in msg_headers:
- date = human_timestamp_to_datetime(
- msg_headers["Date"])
+ date = human_timestamp_to_datetime(msg_headers["Date"])
msg = email.message_from_string(input_)
except Exception as e:
@@ -1099,8 +1191,7 @@ def parse_report_email(
smtp_tls_report = None
sample = None
if "From" in msg_headers:
- logger.info("Parsing mail from {0} on {1}".format(msg_headers["From"],
- date))
+ logger.info("Parsing mail from {0} on {1}".format(msg_headers["From"], date))
if "Subject" in msg_headers:
subject = msg_headers["Subject"]
for part in msg.walk():
@@ -1115,8 +1206,7 @@ def parse_report_email(
feedback_report = payload
else:
feedback_report = b64decode(payload).__str__()
- feedback_report = feedback_report.lstrip(
- "b'").rstrip("'")
+ feedback_report = feedback_report.lstrip("b'").rstrip("'")
feedback_report = feedback_report.replace("\\r", "")
feedback_report = feedback_report.replace("\\n", "\n")
except (ValueError, TypeError, binascii.Error):
@@ -1130,13 +1220,15 @@ def parse_report_email(
if "{" not in payload:
payload = str(b64decode(payload))
smtp_tls_report = parse_smtp_tls_report_json(payload)
- return OrderedDict([("report_type", "smtp_tls"),
- ("report", smtp_tls_report)])
+ return OrderedDict(
+ [("report_type", "smtp_tls"), ("report", smtp_tls_report)]
+ )
elif content_type == "application/tlsrpt+gzip":
payload = extract_report(payload)
smtp_tls_report = parse_smtp_tls_report_json(payload)
- return OrderedDict([("report_type", "smtp_tls"),
- ("report", smtp_tls_report)])
+ return OrderedDict(
+ [("report_type", "smtp_tls"), ("report", smtp_tls_report)]
+ )
elif content_type == "text/plain":
if "A message claiming to be from you has failed" in payload:
@@ -1148,13 +1240,13 @@ def parse_report_email(
field_name = match[0].lower().replace(" ", "-")
fields[field_name] = match[1].strip()
- feedback_report = "Arrival-Date: {}\n" \
- "Source-IP: {}" \
- "".format(fields["received-date"],
- fields["sender-ip-address"])
+ feedback_report = "Arrival-Date: {}\n" "Source-IP: {}" "".format(
+ fields["received-date"], fields["sender-ip-address"]
+ )
except Exception as e:
- error = 'Unable to parse message with ' \
- 'subject "{0}": {1}'.format(subject, e)
+ error = "Unable to parse message with " 'subject "{0}": {1}'.format(
+ subject, e
+ )
raise InvalidDMARCReport(error)
sample = parts[1].lstrip()
@@ -1162,14 +1254,14 @@ def parse_report_email(
else:
try:
payload = b64decode(payload)
- if payload.startswith(MAGIC_ZIP) or \
- payload.startswith(MAGIC_GZIP):
+ if payload.startswith(MAGIC_ZIP) or payload.startswith(MAGIC_GZIP):
payload = extract_report(payload)
ns = nameservers
if payload.startswith("{"):
smtp_tls_report = parse_smtp_tls_report_json(payload)
- result = OrderedDict([("report_type", "smtp_tls"),
- ("report", smtp_tls_report)])
+ result = OrderedDict(
+ [("report_type", "smtp_tls"), ("report", smtp_tls_report)]
+ )
return result
aggregate_report = parse_aggregate_report_xml(
payload,
@@ -1180,23 +1272,28 @@ def parse_report_email(
offline=offline,
nameservers=ns,
timeout=dns_timeout,
- keep_alive=keep_alive)
- result = OrderedDict([("report_type", "aggregate"),
- ("report", aggregate_report)])
+ keep_alive=keep_alive,
+ )
+ result = OrderedDict(
+ [("report_type", "aggregate"), ("report", aggregate_report)]
+ )
return result
except (TypeError, ValueError, binascii.Error):
pass
except InvalidAggregateReport as e:
- error = 'Message with subject "{0}" ' \
- 'is not a valid ' \
- 'aggregate DMARC report: {1}'.format(subject, e)
+ error = (
+ 'Message with subject "{0}" '
+ "is not a valid "
+ "aggregate DMARC report: {1}".format(subject, e)
+ )
raise ParserError(error)
except Exception as e:
- error = 'Unable to parse message with ' \
- 'subject "{0}": {1}'.format(subject, e)
+ error = "Unable to parse message with " 'subject "{0}": {1}'.format(
+ subject, e
+ )
raise ParserError(error)
if feedback_report and sample:
@@ -1212,31 +1309,38 @@ def parse_report_email(
reverse_dns_map_url=reverse_dns_map_url,
nameservers=nameservers,
dns_timeout=dns_timeout,
- strip_attachment_payloads=strip_attachment_payloads)
+ strip_attachment_payloads=strip_attachment_payloads,
+ )
except InvalidForensicReport as e:
- error = 'Message with subject "{0}" ' \
- 'is not a valid ' \
- 'forensic DMARC report: {1}'.format(subject, e)
+ error = (
+ 'Message with subject "{0}" '
+ "is not a valid "
+ "forensic DMARC report: {1}".format(subject, e)
+ )
raise InvalidForensicReport(error)
except Exception as e:
raise InvalidForensicReport(e.__str__())
- result = OrderedDict([("report_type", "forensic"),
- ("report", forensic_report)])
+ result = OrderedDict([("report_type", "forensic"), ("report", forensic_report)])
return result
if result is None:
- error = 'Message with subject "{0}" is ' \
- 'not a valid report'.format(subject)
+ error = 'Message with subject "{0}" is ' "not a valid report".format(subject)
raise InvalidDMARCReport(error)
-def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
- strip_attachment_payloads=False, ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- offline=False, keep_alive=None):
+def parse_report_file(
+ input_,
+ nameservers=None,
+ dns_timeout=2.0,
+ strip_attachment_payloads=False,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ offline=False,
+ keep_alive=None,
+):
"""Parses a DMARC aggregate or forensic file at the given path, a
file-like object. or bytes
@@ -1277,14 +1381,13 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
offline=offline,
nameservers=nameservers,
dns_timeout=dns_timeout,
- keep_alive=keep_alive)
- results = OrderedDict([("report_type", "aggregate"),
- ("report", report)])
+ keep_alive=keep_alive,
+ )
+ results = OrderedDict([("report_type", "aggregate"), ("report", report)])
except InvalidAggregateReport:
try:
report = parse_smtp_tls_report_json(content)
- results = OrderedDict([("report_type", "smtp_tls"),
- ("report", report)])
+ results = OrderedDict([("report_type", "smtp_tls"), ("report", report)])
except InvalidSMTPTLSReport:
try:
sa = strip_attachment_payloads
@@ -1298,19 +1401,24 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
nameservers=nameservers,
dns_timeout=dns_timeout,
strip_attachment_payloads=sa,
- keep_alive=keep_alive)
+ keep_alive=keep_alive,
+ )
except InvalidDMARCReport:
raise ParserError("Not a valid report")
return results
-def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0,
- strip_attachment_payloads=False,
- ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- offline=False):
+def get_dmarc_reports_from_mbox(
+ input_,
+ nameservers=None,
+ dns_timeout=2.0,
+ strip_attachment_payloads=False,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ offline=False,
+):
"""Parses a mailbox in mbox format containing e-mails with attached
DMARC reports
@@ -1338,13 +1446,10 @@ def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0,
mbox = mailbox.mbox(input_)
message_keys = mbox.keys()
total_messages = len(message_keys)
- logger.debug("Found {0} messages in {1}".format(total_messages,
- input_))
+ logger.debug("Found {0} messages in {1}".format(total_messages, input_))
for i in range(len(message_keys)):
message_key = message_keys[i]
- logger.info("Processing message {0} of {1}".format(
- i+1, total_messages
- ))
+ logger.info("Processing message {0} of {1}".format(i + 1, total_messages))
msg_content = mbox.get_string(message_key)
try:
sa = strip_attachment_payloads
@@ -1357,7 +1462,8 @@ def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0,
offline=offline,
nameservers=nameservers,
dns_timeout=dns_timeout,
- strip_attachment_payloads=sa)
+ strip_attachment_payloads=sa,
+ )
if parsed_email["report_type"] == "aggregate":
aggregate_reports.append(parsed_email["report"])
elif parsed_email["report_type"] == "forensic":
@@ -1368,27 +1474,33 @@ def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0,
logger.warning(error.__str__())
except mailbox.NoSuchMailboxError:
raise InvalidDMARCReport("Mailbox {0} does not exist".format(input_))
- return OrderedDict([("aggregate_reports", aggregate_reports),
- ("forensic_reports", forensic_reports),
- ("smtp_tls_reports", smtp_tls_reports)])
-
-
-def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
- reports_folder="INBOX",
- archive_folder="Archive",
- delete=False,
- test=False,
- ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- offline=False,
- nameservers=None,
- dns_timeout=6.0,
- strip_attachment_payloads=False,
- results=None,
- batch_size=10,
- create_folders=True):
+ return OrderedDict(
+ [
+ ("aggregate_reports", aggregate_reports),
+ ("forensic_reports", forensic_reports),
+ ("smtp_tls_reports", smtp_tls_reports),
+ ]
+ )
+
+
+def get_dmarc_reports_from_mailbox(
+ connection: MailboxConnection,
+ reports_folder="INBOX",
+ archive_folder="Archive",
+ delete=False,
+ test=False,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ offline=False,
+ nameservers=None,
+ dns_timeout=6.0,
+ strip_attachment_payloads=False,
+ results=None,
+ batch_size=10,
+ create_folders=True,
+):
"""
Fetches and parses DMARC reports from a mailbox
@@ -1428,15 +1540,10 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
aggregate_report_msg_uids = []
forensic_report_msg_uids = []
smtp_tls_msg_uids = []
- folder_separator = connection.get_folder_separator()
- aggregate_reports_folder = "{0}{1}Aggregate".format(archive_folder,
- folder_separator)
- forensic_reports_folder = "{0}{1}Forensic".format(archive_folder,
- folder_separator)
- smtp_tls_reports_folder = "{0}{1}SMTP-TLS".format(archive_folder,
- folder_separator)
- invalid_reports_folder = "{0}{1}Invalid".format(archive_folder,
- folder_separator)
+ aggregate_reports_folder = "{0}/Aggregate".format(archive_folder)
+ forensic_reports_folder = "{0}/Forensic".format(archive_folder)
+ smtp_tls_reports_folder = "{0}/SMTP-TLS".format(archive_folder)
+ invalid_reports_folder = "{0}/Invalid".format(archive_folder)
if results:
aggregate_reports = results["aggregate_reports"].copy()
@@ -1452,8 +1559,7 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
messages = connection.fetch_messages(reports_folder, batch_size=batch_size)
total_messages = len(messages)
- logger.debug("Found {0} messages in {1}".format(len(messages),
- reports_folder))
+ logger.debug("Found {0} messages in {1}".format(len(messages), reports_folder))
if batch_size:
message_limit = min(total_messages, batch_size)
@@ -1464,9 +1570,11 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
for i in range(message_limit):
msg_uid = messages[i]
- logger.debug("Processing message {0} of {1}: UID {2}".format(
- i+1, message_limit, msg_uid
- ))
+ logger.debug(
+ "Processing message {0} of {1}: UID {2}".format(
+ i + 1, message_limit, msg_uid
+ )
+ )
msg_content = connection.fetch_message(msg_uid)
try:
sa = strip_attachment_payloads
@@ -1480,7 +1588,8 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
reverse_dns_map_url=reverse_dns_map_url,
offline=offline,
strip_attachment_payloads=sa,
- keep_alive=connection.keepalive)
+ keep_alive=connection.keepalive,
+ )
if parsed_email["report_type"] == "aggregate":
aggregate_reports.append(parsed_email["report"])
aggregate_report_msg_uids.append(msg_uid)
@@ -1494,27 +1603,30 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
logger.warning(error.__str__())
if not test:
if delete:
- logger.debug(
- "Deleting message UID {0}".format(msg_uid))
+ logger.debug("Deleting message UID {0}".format(msg_uid))
connection.delete_message(msg_uid)
else:
logger.debug(
"Moving message UID {0} to {1}".format(
- msg_uid, invalid_reports_folder))
+ msg_uid, invalid_reports_folder
+ )
+ )
connection.move_message(msg_uid, invalid_reports_folder)
if not test:
if delete:
- processed_messages = aggregate_report_msg_uids + \
- forensic_report_msg_uids + \
- smtp_tls_msg_uids
+ processed_messages = (
+ aggregate_report_msg_uids + forensic_report_msg_uids + smtp_tls_msg_uids
+ )
number_of_processed_msgs = len(processed_messages)
for i in range(number_of_processed_msgs):
msg_uid = processed_messages[i]
logger.debug(
"Deleting message {0} of {1}: UID {2}".format(
- i + 1, number_of_processed_msgs, msg_uid))
+ i + 1, number_of_processed_msgs, msg_uid
+ )
+ )
try:
connection.delete_message(msg_uid)
@@ -1527,17 +1639,19 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
log_message = "Moving aggregate report messages from"
logger.debug(
"{0} {1} to {2}".format(
- log_message, reports_folder,
- aggregate_reports_folder))
+ log_message, reports_folder, aggregate_reports_folder
+ )
+ )
number_of_agg_report_msgs = len(aggregate_report_msg_uids)
for i in range(number_of_agg_report_msgs):
msg_uid = aggregate_report_msg_uids[i]
logger.debug(
"Moving message {0} of {1}: UID {2}".format(
- i+1, number_of_agg_report_msgs, msg_uid))
+ i + 1, number_of_agg_report_msgs, msg_uid
+ )
+ )
try:
- connection.move_message(msg_uid,
- aggregate_reports_folder)
+ connection.move_message(msg_uid, aggregate_reports_folder)
except Exception as e:
message = "Error moving message UID"
e = "{0} {1}: {2}".format(message, msg_uid, e)
@@ -1545,46 +1659,52 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
if len(forensic_report_msg_uids) > 0:
message = "Moving forensic report messages from"
logger.debug(
- "{0} {1} to {2}".format(message,
- reports_folder,
- forensic_reports_folder))
+ "{0} {1} to {2}".format(
+ message, reports_folder, forensic_reports_folder
+ )
+ )
number_of_forensic_msgs = len(forensic_report_msg_uids)
for i in range(number_of_forensic_msgs):
msg_uid = forensic_report_msg_uids[i]
message = "Moving message"
- logger.debug("{0} {1} of {2}: UID {3}".format(
- message,
- i + 1, number_of_forensic_msgs, msg_uid))
+ logger.debug(
+ "{0} {1} of {2}: UID {3}".format(
+ message, i + 1, number_of_forensic_msgs, msg_uid
+ )
+ )
try:
- connection.move_message(msg_uid,
- forensic_reports_folder)
+ connection.move_message(msg_uid, forensic_reports_folder)
except Exception as e:
- e = "Error moving message UID {0}: {1}".format(
- msg_uid, e)
+ e = "Error moving message UID {0}: {1}".format(msg_uid, e)
logger.error("Mailbox error: {0}".format(e))
if len(smtp_tls_msg_uids) > 0:
message = "Moving SMTP TLS report messages from"
logger.debug(
- "{0} {1} to {2}".format(message,
- reports_folder,
- smtp_tls_reports_folder))
+ "{0} {1} to {2}".format(
+ message, reports_folder, smtp_tls_reports_folder
+ )
+ )
number_of_smtp_tls_uids = len(smtp_tls_msg_uids)
for i in range(number_of_smtp_tls_uids):
msg_uid = smtp_tls_msg_uids[i]
message = "Moving message"
- logger.debug("{0} {1} of {2}: UID {3}".format(
- message,
- i + 1, number_of_smtp_tls_uids, msg_uid))
+ logger.debug(
+ "{0} {1} of {2}: UID {3}".format(
+ message, i + 1, number_of_smtp_tls_uids, msg_uid
+ )
+ )
try:
- connection.move_message(msg_uid,
- smtp_tls_reports_folder)
+ connection.move_message(msg_uid, smtp_tls_reports_folder)
except Exception as e:
- e = "Error moving message UID {0}: {1}".format(
- msg_uid, e)
+ e = "Error moving message UID {0}: {1}".format(msg_uid, e)
logger.error("Mailbox error: {0}".format(e))
- results = OrderedDict([("aggregate_reports", aggregate_reports),
- ("forensic_reports", forensic_reports),
- ("smtp_tls_reports", smtp_tls_reports)])
+ results = OrderedDict(
+ [
+ ("aggregate_reports", aggregate_reports),
+ ("forensic_reports", forensic_reports),
+ ("smtp_tls_reports", smtp_tls_reports),
+ ]
+ )
total_messages = len(connection.fetch_messages(reports_folder))
@@ -1604,23 +1724,30 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection,
always_use_local_files=always_use_local_files,
reverse_dns_map_path=reverse_dns_map_path,
reverse_dns_map_url=reverse_dns_map_url,
- offline=offline
+ offline=offline,
)
return results
-def watch_inbox(mailbox_connection: MailboxConnection,
- callback: Callable,
- reports_folder="INBOX",
- archive_folder="Archive", delete=False, test=False,
- check_timeout=30, ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- offline=False, nameservers=None,
- dns_timeout=6.0, strip_attachment_payloads=False,
- batch_size=None):
+def watch_inbox(
+ mailbox_connection: MailboxConnection,
+ callback: Callable,
+ reports_folder="INBOX",
+ archive_folder="Archive",
+ delete=False,
+ test=False,
+ check_timeout=30,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ offline=False,
+ nameservers=None,
+ dns_timeout=6.0,
+ strip_attachment_payloads=False,
+ batch_size=None,
+):
"""
Watches the mailbox for new messages and
sends the results to a callback function
@@ -1664,11 +1791,11 @@ def check_callback(connection):
dns_timeout=dns_timeout,
strip_attachment_payloads=sa,
batch_size=batch_size,
- create_folders=False)
+ create_folders=False,
+ )
callback(res)
- mailbox_connection.watch(check_callback=check_callback,
- check_timeout=check_timeout)
+ mailbox_connection.watch(check_callback=check_callback, check_timeout=check_timeout)
def append_json(filename, reports):
@@ -1706,13 +1833,16 @@ def append_csv(filename, csv):
output.write(csv)
-def save_output(results, output_directory="output",
- aggregate_json_filename="aggregate.json",
- forensic_json_filename="forensic.json",
- smtp_tls_json_filename="smtp_tls.json",
- aggregate_csv_filename="aggregate.csv",
- forensic_csv_filename="forensic.csv",
- smtp_tls_csv_filename="smtp_tls.csv"):
+def save_output(
+ results,
+ output_directory="output",
+ aggregate_json_filename="aggregate.json",
+ forensic_json_filename="forensic.json",
+ smtp_tls_json_filename="smtp_tls.json",
+ aggregate_csv_filename="aggregate.csv",
+ forensic_csv_filename="forensic.csv",
+ smtp_tls_csv_filename="smtp_tls.csv",
+):
"""
Save report data in the given directory
@@ -1738,23 +1868,32 @@ def save_output(results, output_directory="output",
else:
os.makedirs(output_directory)
- append_json(os.path.join(output_directory, aggregate_json_filename),
- aggregate_reports)
+ append_json(
+ os.path.join(output_directory, aggregate_json_filename), aggregate_reports
+ )
- append_csv(os.path.join(output_directory, aggregate_csv_filename),
- parsed_aggregate_reports_to_csv(aggregate_reports))
+ append_csv(
+ os.path.join(output_directory, aggregate_csv_filename),
+ parsed_aggregate_reports_to_csv(aggregate_reports),
+ )
- append_json(os.path.join(output_directory, forensic_json_filename),
- forensic_reports)
+ append_json(
+ os.path.join(output_directory, forensic_json_filename), forensic_reports
+ )
- append_csv(os.path.join(output_directory, forensic_csv_filename),
- parsed_forensic_reports_to_csv(forensic_reports))
+ append_csv(
+ os.path.join(output_directory, forensic_csv_filename),
+ parsed_forensic_reports_to_csv(forensic_reports),
+ )
- append_json(os.path.join(output_directory, smtp_tls_json_filename),
- smtp_tls_reports)
+ append_json(
+ os.path.join(output_directory, smtp_tls_json_filename), smtp_tls_reports
+ )
- append_csv(os.path.join(output_directory, smtp_tls_csv_filename),
- parsed_smtp_tls_reports_to_csv(smtp_tls_reports))
+ append_csv(
+ os.path.join(output_directory, smtp_tls_csv_filename),
+ parsed_smtp_tls_reports_to_csv(smtp_tls_reports),
+ )
samples_directory = os.path.join(output_directory, "samples")
if not os.path.exists(samples_directory):
@@ -1790,6 +1929,7 @@ def get_report_zip(results):
Returns:
bytes: zip file bytes
"""
+
def add_subdir(root_path, subdir):
subdir_path = os.path.join(root_path, subdir)
for subdir_root, subdir_dirs, subdir_files in os.walk(subdir_path):
@@ -1806,13 +1946,12 @@ def add_subdir(root_path, subdir):
tmp_dir = tempfile.mkdtemp()
try:
save_output(results, tmp_dir)
- with zipfile.ZipFile(storage, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+ with zipfile.ZipFile(storage, "w", zipfile.ZIP_DEFLATED) as zip_file:
for root, dirs, files in os.walk(tmp_dir):
for file in files:
file_path = os.path.join(root, file)
if os.path.isfile(file_path):
- arcname = os.path.join(os.path.relpath(root, tmp_dir),
- file)
+ arcname = os.path.join(os.path.relpath(root, tmp_dir), file)
zip_file.write(file_path, arcname)
for directory in dirs:
dir_path = os.path.join(root, directory)
@@ -1825,11 +1964,22 @@ def add_subdir(root_path, subdir):
return storage.getvalue()
-def email_results(results, host, mail_from, mail_to,
- mail_cc=None, mail_bcc=None, port=0,
- require_encryption=False, verify=True,
- username=None, password=None, subject=None,
- attachment_filename=None, message=None):
+def email_results(
+ results,
+ host,
+ mail_from,
+ mail_to,
+ mail_cc=None,
+ mail_bcc=None,
+ port=0,
+ require_encryption=False,
+ verify=True,
+ username=None,
+ password=None,
+ subject=None,
+ attachment_filename=None,
+ message=None,
+):
"""
Emails parsing results as a zip file
@@ -1867,8 +2017,18 @@ def email_results(results, host, mail_from, mail_to,
zip_bytes = get_report_zip(results)
attachments = [(filename, zip_bytes)]
- send_email(host, mail_from, mail_to, message_cc=mail_cc,
- message_bcc=mail_bcc, port=port,
- require_encryption=require_encryption, verify=verify,
- username=username, password=password, subject=subject,
- attachments=attachments, plain_message=message)
+ send_email(
+ host,
+ mail_from,
+ mail_to,
+ message_cc=mail_cc,
+ message_bcc=mail_bcc,
+ port=port,
+ require_encryption=require_encryption,
+ verify=verify,
+ username=username,
+ password=password,
+ subject=subject,
+ attachments=attachments,
+ plain_message=message,
+ )
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index 05b4e0f9..f1f8ec42 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -16,21 +16,41 @@
import sys
from tqdm import tqdm
-from parsedmarc import get_dmarc_reports_from_mailbox, watch_inbox, \
- parse_report_file, get_dmarc_reports_from_mbox, elastic, opensearch, \
- kafkaclient, splunk, save_output, email_results, ParserError, \
- __version__, InvalidDMARCReport, s3, syslog, loganalytics, gelf, \
- webhook
-from parsedmarc.mail import IMAPConnection, MSGraphConnection, \
- GmailConnection, MaildirConnection
+from parsedmarc import (
+ get_dmarc_reports_from_mailbox,
+ watch_inbox,
+ parse_report_file,
+ get_dmarc_reports_from_mbox,
+ elastic,
+ opensearch,
+ kafkaclient,
+ splunk,
+ save_output,
+ email_results,
+ ParserError,
+ __version__,
+ InvalidDMARCReport,
+ s3,
+ syslog,
+ loganalytics,
+ gelf,
+ webhook,
+)
+from parsedmarc.mail import (
+ IMAPConnection,
+ MSGraphConnection,
+ GmailConnection,
+ MaildirConnection,
+)
from parsedmarc.mail.graph import AuthMethod
from parsedmarc.log import logger
from parsedmarc.utils import is_mbox, get_reverse_dns
formatter = logging.Formatter(
- fmt='%(levelname)8s:%(filename)s:%(lineno)d:%(message)s',
- datefmt='%Y-%m-%d:%H:%M:%S')
+ fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
+ datefmt="%Y-%m-%d:%H:%M:%S",
+)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
@@ -42,12 +62,18 @@ def _str_to_list(s):
return list(map(lambda i: i.lstrip(), _list))
-def cli_parse(file_path, sa, nameservers, dns_timeout,
- ip_db_path, offline,
- always_use_local_files,
- reverse_dns_map_path,
- reverse_dns_map_url,
- conn):
+def cli_parse(
+ file_path,
+ sa,
+ nameservers,
+ dns_timeout,
+ ip_db_path,
+ offline,
+ always_use_local_files,
+ reverse_dns_map_path,
+ reverse_dns_map_url,
+ conn,
+):
"""Separated this function for multiprocessing"""
try:
file_results = parse_report_file(
@@ -59,7 +85,8 @@ def cli_parse(file_path, sa, nameservers, dns_timeout,
reverse_dns_map_url=reverse_dns_map_url,
nameservers=nameservers,
dns_timeout=dns_timeout,
- strip_attachment_payloads=sa)
+ strip_attachment_payloads=sa,
+ )
conn.send([file_results, file_path])
except ParserError as error:
conn.send([error, file_path])
@@ -71,20 +98,21 @@ def _main():
"""Called when the module is executed"""
def process_reports(reports_):
- output_str = "{0}\n".format(json.dumps(reports_,
- ensure_ascii=False,
- indent=2))
+ output_str = "{0}\n".format(json.dumps(reports_, ensure_ascii=False, indent=2))
if not opts.silent:
print(output_str)
if opts.output:
- save_output(results, output_directory=opts.output,
- aggregate_json_filename=opts.aggregate_json_filename,
- forensic_json_filename=opts.forensic_json_filename,
- smtp_tls_json_filename=opts.smtp_tls_json_filename,
- aggregate_csv_filename=opts.aggregate_csv_filename,
- forensic_csv_filename=opts.forensic_csv_filename,
- smtp_tls_csv_filename=opts.smtp_tls_csv_filename)
+ save_output(
+ results,
+ output_directory=opts.output,
+ aggregate_json_filename=opts.aggregate_json_filename,
+ forensic_json_filename=opts.forensic_json_filename,
+ smtp_tls_json_filename=opts.smtp_tls_json_filename,
+ aggregate_csv_filename=opts.aggregate_csv_filename,
+ forensic_csv_filename=opts.forensic_csv_filename,
+ smtp_tls_csv_filename=opts.smtp_tls_csv_filename,
+ )
if opts.save_aggregate:
for report in reports_["aggregate_reports"]:
try:
@@ -97,16 +125,16 @@ def process_reports(reports_):
index_prefix=opts.elasticsearch_index_prefix,
monthly_indexes=opts.elasticsearch_monthly_indexes,
number_of_shards=shards,
- number_of_replicas=replicas
+ number_of_replicas=replicas,
)
except elastic.AlreadySaved as warning:
logger.warning(warning.__str__())
except elastic.ElasticsearchError as error_:
- logger.error("Elasticsearch Error: {0}".format(
- error_.__str__()))
+ logger.error("Elasticsearch Error: {0}".format(error_.__str__()))
except Exception as error_:
- logger.error("Elasticsearch exception error: {}".format(
- error_.__str__()))
+ logger.error(
+ "Elasticsearch exception error: {}".format(error_.__str__())
+ )
try:
if opts.opensearch_hosts:
@@ -118,24 +146,24 @@ def process_reports(reports_):
index_prefix=opts.opensearch_index_prefix,
monthly_indexes=opts.opensearch_monthly_indexes,
number_of_shards=shards,
- number_of_replicas=replicas
+ number_of_replicas=replicas,
)
except opensearch.AlreadySaved as warning:
logger.warning(warning.__str__())
except opensearch.OpenSearchError as error_:
- logger.error("OpenSearch Error: {0}".format(
- error_.__str__()))
+ logger.error("OpenSearch Error: {0}".format(error_.__str__()))
except Exception as error_:
- logger.error("OpenSearch exception error: {}".format(
- error_.__str__()))
+ logger.error(
+ "OpenSearch exception error: {}".format(error_.__str__())
+ )
try:
if opts.kafka_hosts:
kafka_client.save_aggregate_reports_to_kafka(
- report, kafka_aggregate_topic)
+ report, kafka_aggregate_topic
+ )
except Exception as error_:
- logger.error("Kafka Error: {0}".format(
- error_.__str__()))
+ logger.error("Kafka Error: {0}".format(error_.__str__()))
try:
if opts.s3_bucket:
@@ -158,12 +186,8 @@ def process_reports(reports_):
try:
if opts.webhook_aggregate_url:
webhook_client.save_aggregate_report_to_webhook(
- json.dumps(
- report,
- ensure_ascii=False,
- indent=2
- )
- )
+ json.dumps(report, ensure_ascii=False, indent=2)
+ )
except Exception as error_:
logger.error("Webhook Error: {0}".format(error_.__str__()))
@@ -171,8 +195,7 @@ def process_reports(reports_):
try:
aggregate_reports_ = reports_["aggregate_reports"]
if len(aggregate_reports_) > 0:
- hec_client.save_aggregate_reports_to_splunk(
- aggregate_reports_)
+ hec_client.save_aggregate_reports_to_splunk(aggregate_reports_)
except splunk.SplunkError as e:
logger.error("Splunk HEC error: {0}".format(e.__str__()))
@@ -188,12 +211,12 @@ def process_reports(reports_):
index_prefix=opts.elasticsearch_index_prefix,
monthly_indexes=opts.elasticsearch_monthly_indexes,
number_of_shards=shards,
- number_of_replicas=replicas)
+ number_of_replicas=replicas,
+ )
except elastic.AlreadySaved as warning:
logger.warning(warning.__str__())
except elastic.ElasticsearchError as error_:
- logger.error("Elasticsearch Error: {0}".format(
- error_.__str__()))
+ logger.error("Elasticsearch Error: {0}".format(error_.__str__()))
except InvalidDMARCReport as error_:
logger.error(error_.__str__())
@@ -207,22 +230,22 @@ def process_reports(reports_):
index_prefix=opts.opensearch_index_prefix,
monthly_indexes=opts.opensearch_monthly_indexes,
number_of_shards=shards,
- number_of_replicas=replicas)
+ number_of_replicas=replicas,
+ )
except opensearch.AlreadySaved as warning:
logger.warning(warning.__str__())
except opensearch.OpenSearchError as error_:
- logger.error("OpenSearch Error: {0}".format(
- error_.__str__()))
+ logger.error("OpenSearch Error: {0}".format(error_.__str__()))
except InvalidDMARCReport as error_:
logger.error(error_.__str__())
try:
if opts.kafka_hosts:
kafka_client.save_forensic_reports_to_kafka(
- report, kafka_forensic_topic)
+ report, kafka_forensic_topic
+ )
except Exception as error_:
- logger.error("Kafka Error: {0}".format(
- error_.__str__()))
+ logger.error("Kafka Error: {0}".format(error_.__str__()))
try:
if opts.s3_bucket:
@@ -245,10 +268,8 @@ def process_reports(reports_):
try:
if opts.webhook_forensic_url:
webhook_client.save_forensic_report_to_webhook(
- json.dumps(
- report,
- ensure_ascii=False,
- indent=2))
+ json.dumps(report, ensure_ascii=False, indent=2)
+ )
except Exception as error_:
logger.error("Webhook Error: {0}".format(error_.__str__()))
@@ -256,8 +277,7 @@ def process_reports(reports_):
try:
forensic_reports_ = reports_["forensic_reports"]
if len(forensic_reports_) > 0:
- hec_client.save_forensic_reports_to_splunk(
- forensic_reports_)
+ hec_client.save_forensic_reports_to_splunk(forensic_reports_)
except splunk.SplunkError as e:
logger.error("Splunk HEC error: {0}".format(e.__str__()))
@@ -273,12 +293,12 @@ def process_reports(reports_):
index_prefix=opts.elasticsearch_index_prefix,
monthly_indexes=opts.elasticsearch_monthly_indexes,
number_of_shards=shards,
- number_of_replicas=replicas)
+ number_of_replicas=replicas,
+ )
except elastic.AlreadySaved as warning:
logger.warning(warning.__str__())
except elastic.ElasticsearchError as error_:
- logger.error("Elasticsearch Error: {0}".format(
- error_.__str__()))
+ logger.error("Elasticsearch Error: {0}".format(error_.__str__()))
except InvalidDMARCReport as error_:
logger.error(error_.__str__())
@@ -292,22 +312,22 @@ def process_reports(reports_):
index_prefix=opts.opensearch_index_prefix,
monthly_indexes=opts.opensearch_monthly_indexes,
number_of_shards=shards,
- number_of_replicas=replicas)
+ number_of_replicas=replicas,
+ )
except opensearch.AlreadySaved as warning:
logger.warning(warning.__str__())
except opensearch.OpenSearchError as error_:
- logger.error("OpenSearch Error: {0}".format(
- error_.__str__()))
+ logger.error("OpenSearch Error: {0}".format(error_.__str__()))
except InvalidDMARCReport as error_:
logger.error(error_.__str__())
try:
if opts.kafka_hosts:
kafka_client.save_smtp_tls_reports_to_kafka(
- smtp_tls_reports, kafka_smtp_tls_topic)
+ smtp_tls_reports, kafka_smtp_tls_topic
+ )
except Exception as error_:
- logger.error("Kafka Error: {0}".format(
- error_.__str__()))
+ logger.error("Kafka Error: {0}".format(error_.__str__()))
try:
if opts.s3_bucket:
@@ -330,10 +350,8 @@ def process_reports(reports_):
try:
if opts.webhook_smtp_tls_url:
webhook_client.save_smtp_tls_report_to_webhook(
- json.dumps(
- report,
- ensure_ascii=False,
- indent=2))
+ json.dumps(report, ensure_ascii=False, indent=2)
+ )
except Exception as error_:
logger.error("Webhook Error: {0}".format(error_.__str__()))
@@ -341,8 +359,7 @@ def process_reports(reports_):
try:
smtp_tls_reports_ = reports_["smtp_tls_reports"]
if len(smtp_tls_reports_) > 0:
- hec_client.save_smtp_tls_reports_to_splunk(
- smtp_tls_reports_)
+ hec_client.save_smtp_tls_reports_to_splunk(smtp_tls_reports_)
except splunk.SplunkError as e:
logger.error("Splunk HEC error: {0}".format(e.__str__()))
@@ -356,76 +373,105 @@ def process_reports(reports_):
dcr_immutable_id=opts.la_dcr_immutable_id,
dcr_aggregate_stream=opts.la_dcr_aggregate_stream,
dcr_forensic_stream=opts.la_dcr_forensic_stream,
- dcr_smtp_tls_stream=opts.la_dcr_smtp_tls_stream
+ dcr_smtp_tls_stream=opts.la_dcr_smtp_tls_stream,
)
la_client.publish_results(
reports_,
opts.save_aggregate,
opts.save_forensic,
- opts.save_smtp_tls)
+ opts.save_smtp_tls,
+ )
except loganalytics.LogAnalyticsException as e:
- logger.error(
- "Log Analytics error: {0}".format(e.__str__()))
+ logger.error("Log Analytics error: {0}".format(e.__str__()))
except Exception as e:
logger.error(
- "Unknown error occurred" +
- " during the publishing" +
- " to Log Analytics: " +
- e.__str__())
+ "Unknown error occurred"
+ + " during the publishing"
+ + " to Log Analytics: "
+ + e.__str__()
+ )
arg_parser = ArgumentParser(description="Parses DMARC reports")
- arg_parser.add_argument("-c", "--config-file",
- help="a path to a configuration file "
- "(--silent implied)")
- arg_parser.add_argument("file_path", nargs="*",
- help="one or more paths to aggregate or forensic "
- "report files, emails, or mbox files'")
- strip_attachment_help = "remove attachment payloads from forensic " \
- "report output"
- arg_parser.add_argument("--strip-attachment-payloads",
- help=strip_attachment_help, action="store_true")
- arg_parser.add_argument("-o", "--output",
- help="write output files to the given directory")
- arg_parser.add_argument("--aggregate-json-filename",
- help="filename for the aggregate JSON output file",
- default="aggregate.json")
- arg_parser.add_argument("--forensic-json-filename",
- help="filename for the forensic JSON output file",
- default="forensic.json")
- arg_parser.add_argument("--smtp-tls-json-filename",
- help="filename for the SMTP TLS JSON output file",
- default="smtp_tls.json")
- arg_parser.add_argument("--aggregate-csv-filename",
- help="filename for the aggregate CSV output file",
- default="aggregate.csv")
- arg_parser.add_argument("--forensic-csv-filename",
- help="filename for the forensic CSV output file",
- default="forensic.csv")
- arg_parser.add_argument("--smtp-tls-csv-filename",
- help="filename for the SMTP TLS CSV output file",
- default="smtp_tls.csv")
- arg_parser.add_argument("-n", "--nameservers", nargs="+",
- help="nameservers to query")
- arg_parser.add_argument("-t", "--dns_timeout",
- help="number of seconds to wait for an answer "
- "from DNS (default: 2.0)",
- type=float,
- default=2.0)
- arg_parser.add_argument("--offline", action="store_true",
- help="do not make online queries for geolocation "
- " or DNS")
- arg_parser.add_argument("-s", "--silent", action="store_true",
- help="only print errors")
- arg_parser.add_argument("-w", "--warnings", action="store_true",
- help="print warnings in addition to errors")
- arg_parser.add_argument("--verbose", action="store_true",
- help="more verbose output")
- arg_parser.add_argument("--debug", action="store_true",
- help="print debugging information")
- arg_parser.add_argument("--log-file", default=None,
- help="output logging to a file")
- arg_parser.add_argument("-v", "--version", action="version",
- version=__version__)
+ arg_parser.add_argument(
+ "-c",
+ "--config-file",
+ help="a path to a configuration file " "(--silent implied)",
+ )
+ arg_parser.add_argument(
+ "file_path",
+ nargs="*",
+ help="one or more paths to aggregate or forensic "
+ "report files, emails, or mbox files'",
+ )
+ strip_attachment_help = "remove attachment payloads from forensic " "report output"
+ arg_parser.add_argument(
+ "--strip-attachment-payloads", help=strip_attachment_help, action="store_true"
+ )
+ arg_parser.add_argument(
+ "-o", "--output", help="write output files to the given directory"
+ )
+ arg_parser.add_argument(
+ "--aggregate-json-filename",
+ help="filename for the aggregate JSON output file",
+ default="aggregate.json",
+ )
+ arg_parser.add_argument(
+ "--forensic-json-filename",
+ help="filename for the forensic JSON output file",
+ default="forensic.json",
+ )
+ arg_parser.add_argument(
+ "--smtp-tls-json-filename",
+ help="filename for the SMTP TLS JSON output file",
+ default="smtp_tls.json",
+ )
+ arg_parser.add_argument(
+ "--aggregate-csv-filename",
+ help="filename for the aggregate CSV output file",
+ default="aggregate.csv",
+ )
+ arg_parser.add_argument(
+ "--forensic-csv-filename",
+ help="filename for the forensic CSV output file",
+ default="forensic.csv",
+ )
+ arg_parser.add_argument(
+ "--smtp-tls-csv-filename",
+ help="filename for the SMTP TLS CSV output file",
+ default="smtp_tls.csv",
+ )
+ arg_parser.add_argument(
+ "-n", "--nameservers", nargs="+", help="nameservers to query"
+ )
+ arg_parser.add_argument(
+ "-t",
+ "--dns_timeout",
+ help="number of seconds to wait for an answer " "from DNS (default: 2.0)",
+ type=float,
+ default=2.0,
+ )
+ arg_parser.add_argument(
+ "--offline",
+ action="store_true",
+ help="do not make online queries for geolocation " " or DNS",
+ )
+ arg_parser.add_argument(
+ "-s", "--silent", action="store_true", help="only print errors"
+ )
+ arg_parser.add_argument(
+ "-w",
+ "--warnings",
+ action="store_true",
+ help="print warnings in addition to errors",
+ )
+ arg_parser.add_argument(
+ "--verbose", action="store_true", help="more verbose output"
+ )
+ arg_parser.add_argument(
+ "--debug", action="store_true", help="print debugging information"
+ )
+ arg_parser.add_argument("--log-file", default=None, help="output logging to a file")
+ arg_parser.add_argument("-v", "--version", action="version", version=__version__)
aggregate_reports = []
forensic_reports = []
@@ -433,136 +479,137 @@ def process_reports(reports_):
args = arg_parser.parse_args()
- default_gmail_api_scope = 'https://www.googleapis.com/auth/gmail.modify'
-
- opts = Namespace(file_path=args.file_path,
- config_file=args.config_file,
- offline=args.offline,
- strip_attachment_payloads=args.strip_attachment_payloads,
- output=args.output,
- aggregate_csv_filename=args.aggregate_csv_filename,
- aggregate_json_filename=args.aggregate_json_filename,
- forensic_csv_filename=args.forensic_csv_filename,
- forensic_json_filename=args.forensic_json_filename,
- smtp_tls_json_filename=args.smtp_tls_json_filename,
- smtp_tls_csv_filename=args.smtp_tls_csv_filename,
- nameservers=args.nameservers,
- dns_test_address='1.1.1.1',
- silent=args.silent,
- warnings=args.warnings,
- dns_timeout=args.dns_timeout,
- debug=args.debug,
- verbose=args.verbose,
- save_aggregate=False,
- save_forensic=False,
- save_smtp_tls=False,
- mailbox_reports_folder="INBOX",
- mailbox_archive_folder="Archive",
- mailbox_watch=False,
- mailbox_delete=False,
- mailbox_test=False,
- mailbox_batch_size=10,
- mailbox_check_timeout=30,
- imap_host=None,
- imap_skip_certificate_verification=False,
- imap_ssl=True,
- imap_port=993,
- imap_timeout=30,
- imap_max_retries=4,
- imap_user=None,
- imap_password=None,
- graph_auth_method=None,
- graph_user=None,
- graph_password=None,
- graph_client_id=None,
- graph_client_secret=None,
- graph_tenant_id=None,
- graph_mailbox=None,
- graph_allow_unencrypted_storage=False,
- hec=None,
- hec_token=None,
- hec_index=None,
- hec_skip_certificate_verification=False,
- elasticsearch_hosts=None,
- elasticsearch_timeout=60,
- elasticsearch_number_of_shards=1,
- elasticsearch_number_of_replicas=0,
- elasticsearch_index_suffix=None,
- elasticsearch_index_prefix=None,
- elasticsearch_ssl=True,
- elasticsearch_ssl_cert_path=None,
- elasticsearch_monthly_indexes=False,
- elasticsearch_username=None,
- elasticsearch_password=None,
- elasticsearch_apiKey=None,
- opensearch_hosts=None,
- opensearch_timeout=60,
- opensearch_number_of_shards=1,
- opensearch_number_of_replicas=0,
- opensearch_index_suffix=None,
- opensearch_index_prefix=None,
- opensearch_ssl=True,
- opensearch_ssl_cert_path=None,
- opensearch_monthly_indexes=False,
- opensearch_username=None,
- opensearch_password=None,
- opensearch_apiKey=None,
- kafka_hosts=None,
- kafka_username=None,
- kafka_password=None,
- kafka_aggregate_topic=None,
- kafka_forensic_topic=None,
- kafka_smtp_tls_topic=None,
- kafka_ssl=False,
- kafka_skip_certificate_verification=False,
- smtp_host=None,
- smtp_port=25,
- smtp_ssl=False,
- smtp_skip_certificate_verification=False,
- smtp_user=None,
- smtp_password=None,
- smtp_from=None,
- smtp_to=[],
- smtp_subject="parsedmarc report",
- smtp_message="Please see the attached DMARC results.",
- s3_bucket=None,
- s3_path=None,
- s3_region_name=None,
- s3_endpoint_url=None,
- s3_access_key_id=None,
- s3_secret_access_key=None,
- syslog_server=None,
- syslog_port=None,
- gmail_api_credentials_file=None,
- gmail_api_token_file=None,
- gmail_api_include_spam_trash=False,
- gmail_api_paginate_messages=True,
- gmail_api_scopes=[],
- gmail_api_oauth2_port=8080,
- maildir_path=None,
- maildir_create=False,
- log_file=args.log_file,
- n_procs=1,
- ip_db_path=None,
- always_use_local_files=False,
- reverse_dns_map_path=None,
- reverse_dns_map_url=None,
- la_client_id=None,
- la_client_secret=None,
- la_tenant_id=None,
- la_dce=None,
- la_dcr_immutable_id=None,
- la_dcr_aggregate_stream=None,
- la_dcr_forensic_stream=None,
- la_dcr_smtp_tls_stream=None,
- gelf_host=None,
- gelf_port=None,
- gelf_mode=None,
- webhook_aggregate_url=None,
- webhook_forensic_url=None,
- webhook_smtp_tls_url=None,
- webhook_timeout=60
- )
+ default_gmail_api_scope = "https://www.googleapis.com/auth/gmail.modify"
+
+ opts = Namespace(
+ file_path=args.file_path,
+ config_file=args.config_file,
+ offline=args.offline,
+ strip_attachment_payloads=args.strip_attachment_payloads,
+ output=args.output,
+ aggregate_csv_filename=args.aggregate_csv_filename,
+ aggregate_json_filename=args.aggregate_json_filename,
+ forensic_csv_filename=args.forensic_csv_filename,
+ forensic_json_filename=args.forensic_json_filename,
+ smtp_tls_json_filename=args.smtp_tls_json_filename,
+ smtp_tls_csv_filename=args.smtp_tls_csv_filename,
+ nameservers=args.nameservers,
+ dns_test_address="1.1.1.1",
+ silent=args.silent,
+ warnings=args.warnings,
+ dns_timeout=args.dns_timeout,
+ debug=args.debug,
+ verbose=args.verbose,
+ save_aggregate=False,
+ save_forensic=False,
+ save_smtp_tls=False,
+ mailbox_reports_folder="INBOX",
+ mailbox_archive_folder="Archive",
+ mailbox_watch=False,
+ mailbox_delete=False,
+ mailbox_test=False,
+ mailbox_batch_size=10,
+ mailbox_check_timeout=30,
+ imap_host=None,
+ imap_skip_certificate_verification=False,
+ imap_ssl=True,
+ imap_port=993,
+ imap_timeout=30,
+ imap_max_retries=4,
+ imap_user=None,
+ imap_password=None,
+ graph_auth_method=None,
+ graph_user=None,
+ graph_password=None,
+ graph_client_id=None,
+ graph_client_secret=None,
+ graph_tenant_id=None,
+ graph_mailbox=None,
+ graph_allow_unencrypted_storage=False,
+ hec=None,
+ hec_token=None,
+ hec_index=None,
+ hec_skip_certificate_verification=False,
+ elasticsearch_hosts=None,
+ elasticsearch_timeout=60,
+ elasticsearch_number_of_shards=1,
+ elasticsearch_number_of_replicas=0,
+ elasticsearch_index_suffix=None,
+ elasticsearch_index_prefix=None,
+ elasticsearch_ssl=True,
+ elasticsearch_ssl_cert_path=None,
+ elasticsearch_monthly_indexes=False,
+ elasticsearch_username=None,
+ elasticsearch_password=None,
+ elasticsearch_apiKey=None,
+ opensearch_hosts=None,
+ opensearch_timeout=60,
+ opensearch_number_of_shards=1,
+ opensearch_number_of_replicas=0,
+ opensearch_index_suffix=None,
+ opensearch_index_prefix=None,
+ opensearch_ssl=True,
+ opensearch_ssl_cert_path=None,
+ opensearch_monthly_indexes=False,
+ opensearch_username=None,
+ opensearch_password=None,
+ opensearch_apiKey=None,
+ kafka_hosts=None,
+ kafka_username=None,
+ kafka_password=None,
+ kafka_aggregate_topic=None,
+ kafka_forensic_topic=None,
+ kafka_smtp_tls_topic=None,
+ kafka_ssl=False,
+ kafka_skip_certificate_verification=False,
+ smtp_host=None,
+ smtp_port=25,
+ smtp_ssl=False,
+ smtp_skip_certificate_verification=False,
+ smtp_user=None,
+ smtp_password=None,
+ smtp_from=None,
+ smtp_to=[],
+ smtp_subject="parsedmarc report",
+ smtp_message="Please see the attached DMARC results.",
+ s3_bucket=None,
+ s3_path=None,
+ s3_region_name=None,
+ s3_endpoint_url=None,
+ s3_access_key_id=None,
+ s3_secret_access_key=None,
+ syslog_server=None,
+ syslog_port=None,
+ gmail_api_credentials_file=None,
+ gmail_api_token_file=None,
+ gmail_api_include_spam_trash=False,
+ gmail_api_paginate_messages=True,
+ gmail_api_scopes=[],
+ gmail_api_oauth2_port=8080,
+ maildir_path=None,
+ maildir_create=False,
+ log_file=args.log_file,
+ n_procs=1,
+ ip_db_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_path=None,
+ reverse_dns_map_url=None,
+ la_client_id=None,
+ la_client_secret=None,
+ la_tenant_id=None,
+ la_dce=None,
+ la_dcr_immutable_id=None,
+ la_dcr_aggregate_stream=None,
+ la_dcr_forensic_stream=None,
+ la_dcr_smtp_tls_stream=None,
+ gelf_host=None,
+ gelf_port=None,
+ gelf_mode=None,
+ webhook_aggregate_url=None,
+ webhook_forensic_url=None,
+ webhook_smtp_tls_url=None,
+ webhook_timeout=60,
+ )
args = arg_parser.parse_args()
if args.config_file:
@@ -579,45 +626,44 @@ def process_reports(reports_):
opts.offline = general_config.getboolean("offline")
if "strip_attachment_payloads" in general_config:
opts.strip_attachment_payloads = general_config.getboolean(
- "strip_attachment_payloads")
+ "strip_attachment_payloads"
+ )
if "output" in general_config:
opts.output = general_config["output"]
if "aggregate_json_filename" in general_config:
- opts.aggregate_json_filename = general_config[
- "aggregate_json_filename"]
+ opts.aggregate_json_filename = general_config["aggregate_json_filename"]
if "forensic_json_filename" in general_config:
- opts.forensic_json_filename = general_config[
- "forensic_json_filename"]
+ opts.forensic_json_filename = general_config["forensic_json_filename"]
if "smtp_tls_json_filename" in general_config:
- opts.smtp_tls_json_filename = general_config[
- "smtp_tls_json_filename"]
+ opts.smtp_tls_json_filename = general_config["smtp_tls_json_filename"]
if "aggregate_csv_filename" in general_config:
- opts.aggregate_csv_filename = general_config[
- "aggregate_csv_filename"]
+ opts.aggregate_csv_filename = general_config["aggregate_csv_filename"]
if "forensic_csv_filename" in general_config:
- opts.forensic_csv_filename = general_config[
- "forensic_csv_filename"]
+ opts.forensic_csv_filename = general_config["forensic_csv_filename"]
if "smtp_tls_csv_filename" in general_config:
- opts.smtp_tls_csv_filename = general_config[
- "smtp_tls_csv_filename"]
+ opts.smtp_tls_csv_filename = general_config["smtp_tls_csv_filename"]
if "dns_timeout" in general_config:
opts.dns_timeout = general_config.getfloat("dns_timeout")
if "dns_test_address" in general_config:
- opts.dns_test_address=general_config["dns_test_address"]
+ opts.dns_test_address = general_config["dns_test_address"]
if "nameservers" in general_config:
opts.nameservers = _str_to_list(general_config["nameservers"])
# nameservers pre-flight check
- dummy_hostname=None
+ dummy_hostname = None
try:
- dummy_hostname=get_reverse_dns(opts.dns_test_address,
- nameservers=opts.nameservers,
- timeout=opts.dns_timeout)
- except Exception as ns_error:
+ dummy_hostname = get_reverse_dns(
+ opts.dns_test_address,
+ nameservers=opts.nameservers,
+ timeout=opts.dns_timeout,
+ )
+ except Exception as ns_error:
logger.critical("DNS pre-flight check failed: {}".format(ns_error))
exit(-1)
if not dummy_hostname:
- logger.critical("DNS pre-flight check failed: no PTR record for "
- "{} from {}".format(opts.dns_test_address,opts.nameservers))
+ logger.critical(
+ "DNS pre-flight check failed: no PTR record for "
+ "{} from {}".format(opts.dns_test_address, opts.nameservers)
+ )
exit(-1)
if "save_aggregate" in general_config:
opts.save_aggregate = general_config["save_aggregate"]
@@ -643,13 +689,12 @@ def process_reports(reports_):
opts.ip_db_path = None
if "always_use_local_files" in general_config:
opts.always_use_local_files = general_config.getboolean(
- "always_use_local_files")
+ "always_use_local_files"
+ )
if "reverse_dns_map_path" in general_config:
- opts.reverse_dns_map_path = general_config[
- "reverse_dns_path"]
+ opts.reverse_dns_map_path = general_config["reverse_dns_path"]
if "reverse_dns_map_url" in general_config:
- opts.reverse_dns_map_url = general_config[
- "reverse_dns_url"]
+ opts.reverse_dns_map_url = general_config["reverse_dns_url"]
if "mailbox" in config.sections():
mailbox_config = config["mailbox"]
@@ -668,20 +713,20 @@ def process_reports(reports_):
if "batch_size" in mailbox_config:
opts.mailbox_batch_size = mailbox_config.getint("batch_size")
if "check_timeout" in mailbox_config:
- opts.mailbox_check_timeout = mailbox_config.getint(
- "check_timeout")
+ opts.mailbox_check_timeout = mailbox_config.getint("check_timeout")
if "imap" in config.sections():
imap_config = config["imap"]
if "watch" in imap_config:
- logger.warning("Starting in 8.0.0, the watch option has been "
- "moved from the imap configuration section to "
- "the mailbox configuration section.")
+ logger.warning(
+ "Starting in 8.0.0, the watch option has been "
+ "moved from the imap configuration section to "
+ "the mailbox configuration section."
+ )
if "host" in imap_config:
opts.imap_host = imap_config["host"]
else:
- logger.error("host setting missing from the "
- "imap config section")
+ logger.error("host setting missing from the " "imap config section")
exit(-1)
if "port" in imap_config:
opts.imap_port = imap_config.getint("port")
@@ -692,65 +737,78 @@ def process_reports(reports_):
if "ssl" in imap_config:
opts.imap_ssl = imap_config.getboolean("ssl")
if "skip_certificate_verification" in imap_config:
- imap_verify = imap_config.getboolean(
- "skip_certificate_verification")
+ imap_verify = imap_config.getboolean("skip_certificate_verification")
opts.imap_skip_certificate_verification = imap_verify
if "user" in imap_config:
opts.imap_user = imap_config["user"]
else:
- logger.critical("user setting missing from the "
- "imap config section")
+ logger.critical("user setting missing from the " "imap config section")
exit(-1)
if "password" in imap_config:
opts.imap_password = imap_config["password"]
else:
- logger.critical("password setting missing from the "
- "imap config section")
+ logger.critical(
+ "password setting missing from the " "imap config section"
+ )
exit(-1)
if "reports_folder" in imap_config:
opts.mailbox_reports_folder = imap_config["reports_folder"]
- logger.warning("Use of the reports_folder option in the imap "
- "configuration section has been deprecated. "
- "Use this option in the mailbox configuration "
- "section instead.")
+ logger.warning(
+ "Use of the reports_folder option in the imap "
+ "configuration section has been deprecated. "
+ "Use this option in the mailbox configuration "
+ "section instead."
+ )
if "archive_folder" in imap_config:
opts.mailbox_archive_folder = imap_config["archive_folder"]
- logger.warning("Use of the archive_folder option in the imap "
- "configuration section has been deprecated. "
- "Use this option in the mailbox configuration "
- "section instead.")
+ logger.warning(
+ "Use of the archive_folder option in the imap "
+ "configuration section has been deprecated. "
+ "Use this option in the mailbox configuration "
+ "section instead."
+ )
if "watch" in imap_config:
opts.mailbox_watch = imap_config.getboolean("watch")
- logger.warning("Use of the watch option in the imap "
- "configuration section has been deprecated. "
- "Use this option in the mailbox configuration "
- "section instead.")
+ logger.warning(
+ "Use of the watch option in the imap "
+ "configuration section has been deprecated. "
+ "Use this option in the mailbox configuration "
+ "section instead."
+ )
if "delete" in imap_config:
- logger.warning("Use of the delete option in the imap "
- "configuration section has been deprecated. "
- "Use this option in the mailbox configuration "
- "section instead.")
+ logger.warning(
+ "Use of the delete option in the imap "
+ "configuration section has been deprecated. "
+ "Use this option in the mailbox configuration "
+ "section instead."
+ )
if "test" in imap_config:
opts.mailbox_test = imap_config.getboolean("test")
- logger.warning("Use of the test option in the imap "
- "configuration section has been deprecated. "
- "Use this option in the mailbox configuration "
- "section instead.")
+ logger.warning(
+ "Use of the test option in the imap "
+ "configuration section has been deprecated. "
+ "Use this option in the mailbox configuration "
+ "section instead."
+ )
if "batch_size" in imap_config:
opts.mailbox_batch_size = imap_config.getint("batch_size")
- logger.warning("Use of the batch_size option in the imap "
- "configuration section has been deprecated. "
- "Use this option in the mailbox configuration "
- "section instead.")
+ logger.warning(
+ "Use of the batch_size option in the imap "
+ "configuration section has been deprecated. "
+ "Use this option in the mailbox configuration "
+ "section instead."
+ )
if "msgraph" in config.sections():
graph_config = config["msgraph"]
opts.graph_token_file = graph_config.get("token_file", ".token")
if "auth_method" not in graph_config:
- logger.info("auth_method setting missing from the "
- "msgraph config section "
- "defaulting to UsernamePassword")
+ logger.info(
+ "auth_method setting missing from the "
+ "msgraph config section "
+ "defaulting to UsernamePassword"
+ )
opts.graph_auth_method = AuthMethod.UsernamePassword.name
else:
opts.graph_auth_method = graph_config["auth_method"]
@@ -759,19 +817,23 @@ def process_reports(reports_):
if "user" in graph_config:
opts.graph_user = graph_config["user"]
else:
- logger.critical("user setting missing from the "
- "msgraph config section")
+ logger.critical(
+ "user setting missing from the " "msgraph config section"
+ )
exit(-1)
if "password" in graph_config:
opts.graph_password = graph_config["password"]
else:
- logger.critical("password setting missing from the "
- "msgraph config section")
+ logger.critical(
+ "password setting missing from the " "msgraph config section"
+ )
if "client_secret" in graph_config:
opts.graph_client_secret = graph_config["client_secret"]
else:
- logger.critical("client_secret setting missing from the "
- "msgraph config section")
+ logger.critical(
+ "client_secret setting missing from the "
+ "msgraph config section"
+ )
exit(-1)
if opts.graph_auth_method == AuthMethod.DeviceCode.name:
@@ -780,159 +842,154 @@ def process_reports(reports_):
if opts.graph_auth_method != AuthMethod.UsernamePassword.name:
if "tenant_id" in graph_config:
- opts.graph_tenant_id = graph_config['tenant_id']
+ opts.graph_tenant_id = graph_config["tenant_id"]
else:
- logger.critical("tenant_id setting missing from the "
- "msgraph config section")
+ logger.critical(
+ "tenant_id setting missing from the " "msgraph config section"
+ )
exit(-1)
if opts.graph_auth_method == AuthMethod.ClientSecret.name:
if "client_secret" in graph_config:
opts.graph_client_secret = graph_config["client_secret"]
else:
- logger.critical("client_secret setting missing from the "
- "msgraph config section")
+ logger.critical(
+ "client_secret setting missing from the "
+ "msgraph config section"
+ )
exit(-1)
if "client_id" in graph_config:
opts.graph_client_id = graph_config["client_id"]
else:
- logger.critical("client_id setting missing from the "
- "msgraph config section")
+ logger.critical(
+ "client_id setting missing from the " "msgraph config section"
+ )
exit(-1)
if "mailbox" in graph_config:
opts.graph_mailbox = graph_config["mailbox"]
elif opts.graph_auth_method != AuthMethod.UsernamePassword.name:
- logger.critical("mailbox setting missing from the "
- "msgraph config section")
+ logger.critical(
+ "mailbox setting missing from the " "msgraph config section"
+ )
exit(-1)
if "allow_unencrypted_storage" in graph_config:
opts.graph_allow_unencrypted_storage = graph_config.getboolean(
- "allow_unencrypted_storage")
+ "allow_unencrypted_storage"
+ )
if "elasticsearch" in config:
elasticsearch_config = config["elasticsearch"]
if "hosts" in elasticsearch_config:
- opts.elasticsearch_hosts = _str_to_list(elasticsearch_config[
- "hosts"])
+ opts.elasticsearch_hosts = _str_to_list(elasticsearch_config["hosts"])
else:
- logger.critical("hosts setting missing from the "
- "elasticsearch config section")
+ logger.critical(
+ "hosts setting missing from the " "elasticsearch config section"
+ )
exit(-1)
if "timeout" in elasticsearch_config:
timeout = elasticsearch_config.getfloat("timeout")
opts.elasticsearch_timeout = timeout
if "number_of_shards" in elasticsearch_config:
- number_of_shards = elasticsearch_config.getint(
- "number_of_shards")
+ number_of_shards = elasticsearch_config.getint("number_of_shards")
opts.elasticsearch_number_of_shards = number_of_shards
if "number_of_replicas" in elasticsearch_config:
number_of_replicas = elasticsearch_config.getint(
- "number_of_replicas")
+ "number_of_replicas"
+ )
opts.elasticsearch_number_of_replicas = number_of_replicas
if "index_suffix" in elasticsearch_config:
- opts.elasticsearch_index_suffix = elasticsearch_config[
- "index_suffix"]
+ opts.elasticsearch_index_suffix = elasticsearch_config["index_suffix"]
if "index_prefix" in elasticsearch_config:
- opts.elasticsearch_index_prefix = elasticsearch_config[
- "index_prefix"]
+ opts.elasticsearch_index_prefix = elasticsearch_config["index_prefix"]
if "monthly_indexes" in elasticsearch_config:
monthly = elasticsearch_config.getboolean("monthly_indexes")
opts.elasticsearch_monthly_indexes = monthly
if "ssl" in elasticsearch_config:
- opts.elasticsearch_ssl = elasticsearch_config.getboolean(
- "ssl")
+ opts.elasticsearch_ssl = elasticsearch_config.getboolean("ssl")
if "cert_path" in elasticsearch_config:
- opts.elasticsearch_ssl_cert_path = elasticsearch_config[
- "cert_path"]
+ opts.elasticsearch_ssl_cert_path = elasticsearch_config["cert_path"]
if "user" in elasticsearch_config:
- opts.elasticsearch_username = elasticsearch_config[
- "user"]
+ opts.elasticsearch_username = elasticsearch_config["user"]
if "password" in elasticsearch_config:
- opts.elasticsearch_password = elasticsearch_config[
- "password"]
+ opts.elasticsearch_password = elasticsearch_config["password"]
if "apiKey" in elasticsearch_config:
- opts.elasticsearch_apiKey = elasticsearch_config[
- "apiKey"]
+ opts.elasticsearch_apiKey = elasticsearch_config["apiKey"]
if "opensearch" in config:
opensearch_config = config["opensearch"]
if "hosts" in opensearch_config:
- opts.opensearch_hosts = _str_to_list(opensearch_config[
- "hosts"])
+ opts.opensearch_hosts = _str_to_list(opensearch_config["hosts"])
else:
- logger.critical("hosts setting missing from the "
- "opensearch config section")
+ logger.critical(
+ "hosts setting missing from the " "opensearch config section"
+ )
exit(-1)
if "timeout" in opensearch_config:
timeout = opensearch_config.getfloat("timeout")
opts.opensearch_timeout = timeout
if "number_of_shards" in opensearch_config:
- number_of_shards = opensearch_config.getint(
- "number_of_shards")
+ number_of_shards = opensearch_config.getint("number_of_shards")
opts.opensearch_number_of_shards = number_of_shards
if "number_of_replicas" in opensearch_config:
- number_of_replicas = opensearch_config.getint(
- "number_of_replicas")
+ number_of_replicas = opensearch_config.getint("number_of_replicas")
opts.opensearch_number_of_replicas = number_of_replicas
if "index_suffix" in opensearch_config:
- opts.opensearch_index_suffix = opensearch_config[
- "index_suffix"]
+ opts.opensearch_index_suffix = opensearch_config["index_suffix"]
if "index_prefix" in opensearch_config:
- opts.opensearch_index_prefix = opensearch_config[
- "index_prefix"]
+ opts.opensearch_index_prefix = opensearch_config["index_prefix"]
if "monthly_indexes" in opensearch_config:
monthly = opensearch_config.getboolean("monthly_indexes")
opts.opensearch_monthly_indexes = monthly
if "ssl" in opensearch_config:
- opts.opensearch_ssl = opensearch_config.getboolean(
- "ssl")
+ opts.opensearch_ssl = opensearch_config.getboolean("ssl")
if "cert_path" in opensearch_config:
- opts.opensearch_ssl_cert_path = opensearch_config[
- "cert_path"]
+ opts.opensearch_ssl_cert_path = opensearch_config["cert_path"]
if "user" in opensearch_config:
- opts.opensearch_username = opensearch_config[
- "user"]
+ opts.opensearch_username = opensearch_config["user"]
if "password" in opensearch_config:
- opts.opensearch_password = opensearch_config[
- "password"]
+ opts.opensearch_password = opensearch_config["password"]
if "apiKey" in opensearch_config:
- opts.opensearch_apiKey = opensearch_config[
- "apiKey"]
+ opts.opensearch_apiKey = opensearch_config["apiKey"]
if "splunk_hec" in config.sections():
hec_config = config["splunk_hec"]
if "url" in hec_config:
opts.hec = hec_config["url"]
else:
- logger.critical("url setting missing from the "
- "splunk_hec config section")
+ logger.critical(
+ "url setting missing from the " "splunk_hec config section"
+ )
exit(-1)
if "token" in hec_config:
opts.hec_token = hec_config["token"]
else:
- logger.critical("token setting missing from the "
- "splunk_hec config section")
+ logger.critical(
+ "token setting missing from the " "splunk_hec config section"
+ )
exit(-1)
if "index" in hec_config:
opts.hec_index = hec_config["index"]
else:
- logger.critical("index setting missing from the "
- "splunk_hec config section")
+ logger.critical(
+ "index setting missing from the " "splunk_hec config section"
+ )
exit(-1)
if "skip_certificate_verification" in hec_config:
opts.hec_skip_certificate_verification = hec_config[
- "skip_certificate_verification"]
+ "skip_certificate_verification"
+ ]
if "kafka" in config.sections():
kafka_config = config["kafka"]
if "hosts" in kafka_config:
opts.kafka_hosts = _str_to_list(kafka_config["hosts"])
else:
- logger.critical("hosts setting missing from the "
- "kafka config section")
+ logger.critical(
+ "hosts setting missing from the " "kafka config section"
+ )
exit(-1)
if "user" in kafka_config:
opts.kafka_username = kafka_config["user"]
@@ -941,64 +998,63 @@ def process_reports(reports_):
if "ssl" in kafka_config:
opts.kafka_ssl = kafka_config.getboolean("ssl")
if "skip_certificate_verification" in kafka_config:
- kafka_verify = kafka_config.getboolean(
- "skip_certificate_verification")
+ kafka_verify = kafka_config.getboolean("skip_certificate_verification")
opts.kafka_skip_certificate_verification = kafka_verify
if "aggregate_topic" in kafka_config:
opts.kafka_aggregate_topic = kafka_config["aggregate_topic"]
else:
- logger.critical("aggregate_topic setting missing from the "
- "kafka config section")
+ logger.critical(
+ "aggregate_topic setting missing from the " "kafka config section"
+ )
exit(-1)
if "forensic_topic" in kafka_config:
opts.kafka_forensic_topic = kafka_config["forensic_topic"]
else:
- logger.critical("forensic_topic setting missing from the "
- "kafka config section")
+ logger.critical(
+ "forensic_topic setting missing from the " "kafka config section"
+ )
if "smtp_tls_topic" in kafka_config:
opts.kafka_smtp_tls_topic = kafka_config["smtp_tls_topic"]
else:
- logger.critical("forensic_topic setting missing from the "
- "splunk_hec config section")
+ logger.critical(
+ "forensic_topic setting missing from the "
+ "splunk_hec config section"
+ )
if "smtp" in config.sections():
smtp_config = config["smtp"]
if "host" in smtp_config:
opts.smtp_host = smtp_config["host"]
else:
- logger.critical("host setting missing from the "
- "smtp config section")
+ logger.critical("host setting missing from the " "smtp config section")
exit(-1)
if "port" in smtp_config:
opts.smtp_port = smtp_config.getint("port")
if "ssl" in smtp_config:
opts.smtp_ssl = smtp_config.getboolean("ssl")
if "skip_certificate_verification" in smtp_config:
- smtp_verify = smtp_config.getboolean(
- "skip_certificate_verification")
+ smtp_verify = smtp_config.getboolean("skip_certificate_verification")
opts.smtp_skip_certificate_verification = smtp_verify
if "user" in smtp_config:
opts.smtp_user = smtp_config["user"]
else:
- logger.critical("user setting missing from the "
- "smtp config section")
+ logger.critical("user setting missing from the " "smtp config section")
exit(-1)
if "password" in smtp_config:
opts.smtp_password = smtp_config["password"]
else:
- logger.critical("password setting missing from the "
- "smtp config section")
+ logger.critical(
+ "password setting missing from the " "smtp config section"
+ )
exit(-1)
if "from" in smtp_config:
opts.smtp_from = smtp_config["from"]
else:
- logger.critical("from setting missing from the "
- "smtp config section")
+ logger.critical("from setting missing from the " "smtp config section")
if "to" in smtp_config:
opts.smtp_to = _str_to_list(smtp_config["to"])
else:
- logger.critical("to setting missing from the "
- "smtp config section")
+ logger.critical("to setting missing from the " "smtp config section")
if "subject" in smtp_config:
opts.smtp_subject = smtp_config["subject"]
if "attachment" in smtp_config:
@@ -1011,8 +1067,7 @@ def process_reports(reports_):
if "bucket" in s3_config:
opts.s3_bucket = s3_config["bucket"]
else:
- logger.critical("bucket setting missing from the "
- "s3 config section")
+ logger.critical("bucket setting missing from the " "s3 config section")
exit(-1)
if "path" in s3_config:
opts.s3_path = s3_config["path"]
@@ -1037,8 +1092,9 @@ def process_reports(reports_):
if "server" in syslog_config:
opts.syslog_server = syslog_config["server"]
else:
- logger.critical("server setting missing from the "
- "syslog config section")
+ logger.critical(
+ "server setting missing from the " "syslog config section"
+ )
exit(-1)
if "port" in syslog_config:
opts.syslog_port = syslog_config["port"]
@@ -1047,68 +1103,59 @@ def process_reports(reports_):
if "gmail_api" in config.sections():
gmail_api_config = config["gmail_api"]
- opts.gmail_api_credentials_file = \
- gmail_api_config.get("credentials_file")
- opts.gmail_api_token_file = \
- gmail_api_config.get("token_file", ".token")
- opts.gmail_api_include_spam_trash = \
- gmail_api_config.getboolean("include_spam_trash", False)
- opts.gmail_api_paginate_messages = \
- gmail_api_config.getboolean("paginate_messages", True)
- opts.gmail_api_scopes = \
- gmail_api_config.get("scopes",
- default_gmail_api_scope)
- opts.gmail_api_scopes = \
- _str_to_list(opts.gmail_api_scopes)
+ opts.gmail_api_credentials_file = gmail_api_config.get("credentials_file")
+ opts.gmail_api_token_file = gmail_api_config.get("token_file", ".token")
+ opts.gmail_api_include_spam_trash = gmail_api_config.getboolean(
+ "include_spam_trash", False
+ )
+ opts.gmail_api_paginate_messages = gmail_api_config.getboolean(
+ "paginate_messages", True
+ )
+ opts.gmail_api_scopes = gmail_api_config.get(
+ "scopes", default_gmail_api_scope
+ )
+ opts.gmail_api_scopes = _str_to_list(opts.gmail_api_scopes)
if "oauth2_port" in gmail_api_config:
- opts.gmail_api_oauth2_port = \
- gmail_api_config.get("oauth2_port", 8080)
+ opts.gmail_api_oauth2_port = gmail_api_config.get("oauth2_port", 8080)
if "maildir" in config.sections():
maildir_api_config = config["maildir"]
- opts.maildir_path = \
- maildir_api_config.get("maildir_path")
- opts.maildir_create = \
- maildir_api_config.get("maildir_create")
+ opts.maildir_path = maildir_api_config.get("maildir_path")
+ opts.maildir_create = maildir_api_config.get("maildir_create")
if "log_analytics" in config.sections():
log_analytics_config = config["log_analytics"]
- opts.la_client_id = \
- log_analytics_config.get("client_id")
- opts.la_client_secret = \
- log_analytics_config.get("client_secret")
- opts.la_tenant_id = \
- log_analytics_config.get("tenant_id")
- opts.la_dce = \
- log_analytics_config.get("dce")
- opts.la_dcr_immutable_id = \
- log_analytics_config.get("dcr_immutable_id")
- opts.la_dcr_aggregate_stream = \
- log_analytics_config.get("dcr_aggregate_stream")
- opts.la_dcr_forensic_stream = \
- log_analytics_config.get("dcr_forensic_stream")
- opts.la_dcr_smtp_tls_stream = \
- log_analytics_config.get("dcr_smtp_tls_stream")
+ opts.la_client_id = log_analytics_config.get("client_id")
+ opts.la_client_secret = log_analytics_config.get("client_secret")
+ opts.la_tenant_id = log_analytics_config.get("tenant_id")
+ opts.la_dce = log_analytics_config.get("dce")
+ opts.la_dcr_immutable_id = log_analytics_config.get("dcr_immutable_id")
+ opts.la_dcr_aggregate_stream = log_analytics_config.get(
+ "dcr_aggregate_stream"
+ )
+ opts.la_dcr_forensic_stream = log_analytics_config.get(
+ "dcr_forensic_stream"
+ )
+ opts.la_dcr_smtp_tls_stream = log_analytics_config.get(
+ "dcr_smtp_tls_stream"
+ )
if "gelf" in config.sections():
gelf_config = config["gelf"]
if "host" in gelf_config:
opts.gelf_host = gelf_config["host"]
else:
- logger.critical("host setting missing from the "
- "gelf config section")
+ logger.critical("host setting missing from the " "gelf config section")
exit(-1)
if "port" in gelf_config:
opts.gelf_port = gelf_config["port"]
else:
- logger.critical("port setting missing from the "
- "gelf config section")
+ logger.critical("port setting missing from the " "gelf config section")
exit(-1)
if "mode" in gelf_config:
opts.gelf_mode = gelf_config["mode"]
else:
- logger.critical("mode setting missing from the "
- "gelf config section")
+ logger.critical("mode setting missing from the " "gelf config section")
exit(-1)
if "webhook" in config.sections():
@@ -1136,18 +1183,21 @@ def process_reports(reports_):
log_file.close()
fh = logging.FileHandler(opts.log_file)
formatter = logging.Formatter(
- '%(asctime)s - '
- '%(levelname)s - [%(filename)s:%(lineno)d] - %(message)s')
+ "%(asctime)s - "
+ "%(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
+ )
fh.setFormatter(formatter)
logger.addHandler(fh)
except Exception as error:
logger.warning("Unable to write to log file: {}".format(error))
- if opts.imap_host is None \
- and opts.graph_client_id is None \
- and opts.gmail_api_credentials_file is None \
- and opts.maildir_path is None \
- and len(opts.file_path) == 0:
+ if (
+ opts.imap_host is None
+ and opts.graph_client_id is None
+ and opts.gmail_api_credentials_file is None
+ and opts.maildir_path is None
+ and len(opts.file_path) == 0
+ ):
logger.error("You must supply input files or a mailbox connection")
exit(1)
@@ -1161,31 +1211,27 @@ def process_reports(reports_):
es_smtp_tls_index = "smtp_tls"
if opts.elasticsearch_index_suffix:
suffix = opts.elasticsearch_index_suffix
- es_aggregate_index = "{0}_{1}".format(
- es_aggregate_index, suffix)
- es_forensic_index = "{0}_{1}".format(
- es_forensic_index, suffix)
- es_smtp_tls_index = "{0}_{1}".format(
- es_smtp_tls_index, suffix
- )
+ es_aggregate_index = "{0}_{1}".format(es_aggregate_index, suffix)
+ es_forensic_index = "{0}_{1}".format(es_forensic_index, suffix)
+ es_smtp_tls_index = "{0}_{1}".format(es_smtp_tls_index, suffix)
if opts.elasticsearch_index_prefix:
prefix = opts.elasticsearch_index_prefix
- es_aggregate_index = "{0}{1}".format(
- prefix, es_aggregate_index)
- es_forensic_index = "{0}{1}".format(
- prefix, es_forensic_index)
- es_smtp_tls_index = "{0}{1}".format(
- prefix, es_smtp_tls_index
- )
- elastic.set_hosts(opts.elasticsearch_hosts,
- opts.elasticsearch_ssl,
- opts.elasticsearch_ssl_cert_path,
- opts.elasticsearch_username,
- opts.elasticsearch_password,
- opts.elasticsearch_apiKey,
- timeout=opts.elasticsearch_timeout)
- elastic.migrate_indexes(aggregate_indexes=[es_aggregate_index],
- forensic_indexes=[es_forensic_index])
+ es_aggregate_index = "{0}{1}".format(prefix, es_aggregate_index)
+ es_forensic_index = "{0}{1}".format(prefix, es_forensic_index)
+ es_smtp_tls_index = "{0}{1}".format(prefix, es_smtp_tls_index)
+ elastic.set_hosts(
+ opts.elasticsearch_hosts,
+ opts.elasticsearch_ssl,
+ opts.elasticsearch_ssl_cert_path,
+ opts.elasticsearch_username,
+ opts.elasticsearch_password,
+ opts.elasticsearch_apiKey,
+ timeout=opts.elasticsearch_timeout,
+ )
+ elastic.migrate_indexes(
+ aggregate_indexes=[es_aggregate_index],
+ forensic_indexes=[es_forensic_index],
+ )
except elastic.ElasticsearchError:
logger.exception("Elasticsearch Error")
exit(1)
@@ -1197,32 +1243,27 @@ def process_reports(reports_):
os_smtp_tls_index = "smtp_tls"
if opts.opensearch_index_suffix:
suffix = opts.opensearch_index_suffix
- os_aggregate_index = "{0}_{1}".format(
- os_aggregate_index, suffix)
- os_forensic_index = "{0}_{1}".format(
- os_forensic_index, suffix)
- os_smtp_tls_index = "{0}_{1}".format(
- os_smtp_tls_index, suffix
- )
+ os_aggregate_index = "{0}_{1}".format(os_aggregate_index, suffix)
+ os_forensic_index = "{0}_{1}".format(os_forensic_index, suffix)
+ os_smtp_tls_index = "{0}_{1}".format(os_smtp_tls_index, suffix)
if opts.opensearch_index_prefix:
prefix = opts.opensearch_index_prefix
- os_aggregate_index = "{0}{1}".format(
- prefix, os_aggregate_index)
- os_forensic_index = "{0}{1}".format(
- prefix, os_forensic_index)
- os_smtp_tls_index = "{0}{1}".format(
- prefix, os_smtp_tls_index
- )
- opensearch.set_hosts(opts.opensearch_hosts,
- opts.opensearch_ssl,
- opts.opensearch_ssl_cert_path,
- opts.opensearch_username,
- opts.opensearch_password,
- opts.opensearch_apiKey,
- timeout=opts.opensearch_timeout)
+ os_aggregate_index = "{0}{1}".format(prefix, os_aggregate_index)
+ os_forensic_index = "{0}{1}".format(prefix, os_forensic_index)
+ os_smtp_tls_index = "{0}{1}".format(prefix, os_smtp_tls_index)
+ opensearch.set_hosts(
+ opts.opensearch_hosts,
+ opts.opensearch_ssl,
+ opts.opensearch_ssl_cert_path,
+ opts.opensearch_username,
+ opts.opensearch_password,
+ opts.opensearch_apiKey,
+ timeout=opts.opensearch_timeout,
+ )
opensearch.migrate_indexes(
aggregate_indexes=[os_aggregate_index],
- forensic_indexes=[os_forensic_index])
+ forensic_indexes=[os_forensic_index],
+ )
except opensearch.OpenSearchError:
logger.exception("OpenSearch Error")
exit(1)
@@ -1251,16 +1292,15 @@ def process_reports(reports_):
if opts.hec:
if opts.hec_token is None or opts.hec_index is None:
- logger.error("HEC token and HEC index are required when "
- "using HEC URL")
+ logger.error("HEC token and HEC index are required when " "using HEC URL")
exit(1)
verify = True
if opts.hec_skip_certificate_verification:
verify = False
- hec_client = splunk.HECClient(opts.hec, opts.hec_token,
- opts.hec_index,
- verify=verify)
+ hec_client = splunk.HECClient(
+ opts.hec, opts.hec_token, opts.hec_index, verify=verify
+ )
if opts.kafka_hosts:
try:
@@ -1274,7 +1314,7 @@ def process_reports(reports_):
opts.kafka_hosts,
username=opts.kafka_username,
password=opts.kafka_password,
- ssl_context=ssl_context
+ ssl_context=ssl_context,
)
except Exception as error_:
logger.error("Kafka Error: {0}".format(error_.__str__()))
@@ -1289,15 +1329,17 @@ def process_reports(reports_):
except Exception as error_:
logger.error("GELF Error: {0}".format(error_.__str__()))
- if opts.webhook_aggregate_url or \
- opts.webhook_forensic_url or \
- opts.webhook_smtp_tls_url:
+ if (
+ opts.webhook_aggregate_url
+ or opts.webhook_forensic_url
+ or opts.webhook_smtp_tls_url
+ ):
try:
webhook_client = webhook.WebhookClient(
aggregate_url=opts.webhook_aggregate_url,
forensic_url=opts.webhook_forensic_url,
smtp_tls_url=opts.webhook_smtp_tls_url,
- timeout=opts.webhook_timeout
+ timeout=opts.webhook_timeout,
)
except Exception as error_:
logger.error("Webhook Error: {0}".format(error_.__str__()))
@@ -1333,26 +1375,29 @@ def process_reports(reports_):
connections = []
for proc_index in range(
- opts.n_procs * batch_index,
- opts.n_procs * (batch_index + 1)):
+ opts.n_procs * batch_index, opts.n_procs * (batch_index + 1)
+ ):
if proc_index >= len(file_paths):
break
parent_conn, child_conn = Pipe()
connections.append(parent_conn)
- process = Process(target=cli_parse, args=(
- file_paths[proc_index],
- opts.strip_attachment_payloads,
- opts.nameservers,
- opts.dns_timeout,
- opts.ip_db_path,
- opts.offline,
- opts.always_use_local_files,
- opts.reverse_dns_map_path,
- opts.reverse_dns_map_url,
- child_conn,
- ))
+ process = Process(
+ target=cli_parse,
+ args=(
+ file_paths[proc_index],
+ opts.strip_attachment_payloads,
+ opts.nameservers,
+ opts.dns_timeout,
+ opts.ip_db_path,
+ opts.offline,
+ opts.always_use_local_files,
+ opts.reverse_dns_map_path,
+ opts.reverse_dns_map_url,
+ child_conn,
+ ),
+ )
processes.append(process)
for proc in processes:
@@ -1369,8 +1414,7 @@ def process_reports(reports_):
for result in results:
if type(result[0]) is ParserError:
- logger.error("Failed to parse {0} - {1}".format(result[1],
- result[0]))
+ logger.error("Failed to parse {0} - {1}".format(result[1], result[0]))
else:
if result[0]["report_type"] == "aggregate":
aggregate_reports.append(result[0]["report"])
@@ -1390,7 +1434,8 @@ def process_reports(reports_):
always_use_local_files=opts.always_use_local_files,
reverse_dns_map_path=opts.reverse_dns_map_path,
reverse_dns_map_url=opts.reverse_dns_map_url,
- offline=opts.offline)
+ offline=opts.offline,
+ )
aggregate_reports += reports["aggregate_reports"]
forensic_reports += reports["forensic_reports"]
smtp_tls_reports += reports["smtp_tls_reports"]
@@ -1399,8 +1444,9 @@ def process_reports(reports_):
if opts.imap_host:
try:
if opts.imap_user is None or opts.imap_password is None:
- logger.error("IMAP user and password must be specified if"
- "host is specified")
+ logger.error(
+ "IMAP user and password must be specified if" "host is specified"
+ )
ssl = True
verify = True
@@ -1437,7 +1483,7 @@ def process_reports(reports_):
username=opts.graph_user,
password=opts.graph_password,
token_file=opts.graph_token_file,
- allow_unencrypted_storage=opts.graph_allow_unencrypted_storage
+ allow_unencrypted_storage=opts.graph_allow_unencrypted_storage,
)
except Exception:
@@ -1446,11 +1492,13 @@ def process_reports(reports_):
if opts.gmail_api_credentials_file:
if opts.mailbox_delete:
- if 'https://mail.google.com/' not in opts.gmail_api_scopes:
- logger.error("Message deletion requires scope"
- " 'https://mail.google.com/'. "
- "Add the scope and remove token file "
- "to acquire proper access.")
+ if "https://mail.google.com/" not in opts.gmail_api_scopes:
+ logger.error(
+ "Message deletion requires scope"
+ " 'https://mail.google.com/'. "
+ "Add the scope and remove token file "
+ "to acquire proper access."
+ )
opts.mailbox_delete = False
try:
@@ -1461,7 +1509,7 @@ def process_reports(reports_):
include_spam_trash=opts.gmail_api_include_spam_trash,
paginate_messages=opts.gmail_api_paginate_messages,
reports_folder=opts.mailbox_reports_folder,
- oauth2_port=opts.gmail_api_oauth2_port
+ oauth2_port=opts.gmail_api_oauth2_port,
)
except Exception:
@@ -1504,9 +1552,13 @@ def process_reports(reports_):
logger.exception("Mailbox Error")
exit(1)
- results = OrderedDict([("aggregate_reports", aggregate_reports),
- ("forensic_reports", forensic_reports),
- ("smtp_tls_reports", smtp_tls_reports)])
+ results = OrderedDict(
+ [
+ ("aggregate_reports", aggregate_reports),
+ ("forensic_reports", forensic_reports),
+ ("smtp_tls_reports", smtp_tls_reports),
+ ]
+ )
process_reports(results)
@@ -1515,11 +1567,17 @@ def process_reports(reports_):
verify = True
if opts.smtp_skip_certificate_verification:
verify = False
- email_results(results, opts.smtp_host, opts.smtp_from,
- opts.smtp_to, port=opts.smtp_port, verify=verify,
- username=opts.smtp_user,
- password=opts.smtp_password,
- subject=opts.smtp_subject)
+ email_results(
+ results,
+ opts.smtp_host,
+ opts.smtp_from,
+ opts.smtp_to,
+ port=opts.smtp_port,
+ verify=verify,
+ username=opts.smtp_user,
+ password=opts.smtp_password,
+ subject=opts.smtp_subject,
+ )
except Exception:
logger.exception("Failed to email results")
exit(1)
@@ -1544,7 +1602,8 @@ def process_reports(reports_):
always_use_local_files=opts.always_use_local_files,
reverse_dns_map_path=opts.reverse_dns_map_path,
reverse_dns_map_url=opts.reverse_dns_map_url,
- offline=opts.offline)
+ offline=opts.offline,
+ )
except FileExistsError as error:
logger.error("{0}".format(error.__str__()))
exit(1)
diff --git a/parsedmarc/elastic.py b/parsedmarc/elastic.py
index 93ce05d8..34d7953c 100644
--- a/parsedmarc/elastic.py
+++ b/parsedmarc/elastic.py
@@ -3,8 +3,20 @@
from collections import OrderedDict
from elasticsearch_dsl.search import Q
-from elasticsearch_dsl import connections, Object, Document, Index, Nested, \
- InnerDoc, Integer, Text, Boolean, Ip, Date, Search
+from elasticsearch_dsl import (
+ connections,
+ Object,
+ Document,
+ Index,
+ Nested,
+ InnerDoc,
+ Integer,
+ Text,
+ Boolean,
+ Ip,
+ Date,
+ Search,
+)
from elasticsearch.helpers import reindex
from parsedmarc.log import logger
@@ -76,24 +88,21 @@ class Index:
spf_results = Nested(_SPFResult)
def add_policy_override(self, type_, comment):
- self.policy_overrides.append(_PolicyOverride(type=type_,
- comment=comment))
+ self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
def add_dkim_result(self, domain, selector, result):
- self.dkim_results.append(_DKIMResult(domain=domain,
- selector=selector,
- result=result))
+ self.dkim_results.append(
+ _DKIMResult(domain=domain, selector=selector, result=result)
+ )
def add_spf_result(self, domain, scope, result):
- self.spf_results.append(_SPFResult(domain=domain,
- scope=scope,
- result=result))
+ self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
- def save(self, ** kwargs):
+ def save(self, **kwargs):
self.passed_dmarc = False
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
- return super().save(** kwargs)
+ return super().save(**kwargs)
class _EmailAddressDoc(InnerDoc):
@@ -123,24 +132,25 @@ class _ForensicSampleDoc(InnerDoc):
attachments = Nested(_EmailAttachmentDoc)
def add_to(self, display_name, address):
- self.to.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
def add_reply_to(self, display_name, address):
- self.reply_to.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.reply_to.append(
+ _EmailAddressDoc(display_name=display_name, address=address)
+ )
def add_cc(self, display_name, address):
- self.cc.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
def add_bcc(self, display_name, address):
- self.bcc.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
def add_attachment(self, filename, content_type, sha256):
- self.attachments.append(_EmailAttachmentDoc(filename=filename,
- content_type=content_type, sha256=sha256))
+ self.attachments.append(
+ _EmailAttachmentDoc(
+ filename=filename, content_type=content_type, sha256=sha256
+ )
+ )
class _ForensicReportDoc(Document):
@@ -185,14 +195,18 @@ class _SMTPTLSPolicyDoc(InnerDoc):
failed_session_count = Integer()
failure_details = Nested(_SMTPTLSFailureDetailsDoc)
- def add_failure_details(self, result_type, ip_address,
- receiving_ip,
- receiving_mx_helo,
- failed_session_count,
- sending_mta_ip=None,
- receiving_mx_hostname=None,
- additional_information_uri=None,
- failure_reason_code=None):
+ def add_failure_details(
+ self,
+ result_type,
+ ip_address,
+ receiving_ip,
+ receiving_mx_helo,
+ failed_session_count,
+ sending_mta_ip=None,
+ receiving_mx_hostname=None,
+ additional_information_uri=None,
+ failure_reason_code=None,
+ ):
_details = _SMTPTLSFailureDetailsDoc(
result_type=result_type,
ip_address=ip_address,
@@ -202,13 +216,12 @@ def add_failure_details(self, result_type, ip_address,
receiving_ip=receiving_ip,
failed_session_count=failed_session_count,
additional_information=additional_information_uri,
- failure_reason_code=failure_reason_code
+ failure_reason_code=failure_reason_code,
)
self.failure_details.append(_details)
class _SMTPTLSReportDoc(Document):
-
class Index:
name = "smtp_tls"
@@ -220,27 +233,40 @@ class Index:
report_id = Text()
policies = Nested(_SMTPTLSPolicyDoc)
- def add_policy(self, policy_type, policy_domain,
- successful_session_count,
- failed_session_count,
- policy_string=None,
- mx_host_patterns=None,
- failure_details=None):
- self.policies.append(policy_type=policy_type,
- policy_domain=policy_domain,
- successful_session_count=successful_session_count,
- failed_session_count=failed_session_count,
- policy_string=policy_string,
- mx_host_patterns=mx_host_patterns,
- failure_details=failure_details)
+ def add_policy(
+ self,
+ policy_type,
+ policy_domain,
+ successful_session_count,
+ failed_session_count,
+ policy_string=None,
+ mx_host_patterns=None,
+ failure_details=None,
+ ):
+ self.policies.append(
+ policy_type=policy_type,
+ policy_domain=policy_domain,
+ successful_session_count=successful_session_count,
+ failed_session_count=failed_session_count,
+ policy_string=policy_string,
+ mx_host_patterns=mx_host_patterns,
+ failure_details=failure_details,
+ )
class AlreadySaved(ValueError):
"""Raised when a report to be saved matches an existing report"""
-def set_hosts(hosts, use_ssl=False, ssl_cert_path=None,
- username=None, password=None, apiKey=None, timeout=60.0):
+def set_hosts(
+ hosts,
+ use_ssl=False,
+ ssl_cert_path=None,
+ username=None,
+ password=None,
+ apiKey=None,
+ timeout=60.0,
+):
"""
Sets the Elasticsearch hosts to use
@@ -255,21 +281,18 @@ def set_hosts(hosts, use_ssl=False, ssl_cert_path=None,
"""
if not isinstance(hosts, list):
hosts = [hosts]
- conn_params = {
- "hosts": hosts,
- "timeout": timeout
- }
+ conn_params = {"hosts": hosts, "timeout": timeout}
if use_ssl:
- conn_params['use_ssl'] = True
+ conn_params["use_ssl"] = True
if ssl_cert_path:
- conn_params['verify_certs'] = True
- conn_params['ca_certs'] = ssl_cert_path
+ conn_params["verify_certs"] = True
+ conn_params["ca_certs"] = ssl_cert_path
else:
- conn_params['verify_certs'] = False
+ conn_params["verify_certs"] = False
if username:
- conn_params['http_auth'] = (username+":"+password)
+ conn_params["http_auth"] = username + ":" + password
if apiKey:
- conn_params['api_key'] = apiKey
+ conn_params["api_key"] = apiKey
connections.create_connection(**conn_params)
@@ -288,14 +311,12 @@ def create_indexes(names, settings=None):
if not index.exists():
logger.debug("Creating Elasticsearch index: {0}".format(name))
if settings is None:
- index.settings(number_of_shards=1,
- number_of_replicas=0)
+ index.settings(number_of_shards=1, number_of_replicas=0)
else:
index.settings(**settings)
index.create()
except Exception as e:
- raise ElasticsearchError(
- "Elasticsearch error: {0}".format(e.__str__()))
+ raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
@@ -327,33 +348,31 @@ def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
fo_type = fo_mapping["type"]
if fo_type == "long":
new_index_name = "{0}-v{1}".format(aggregate_index_name, version)
- body = {"properties": {"published_policy.fo": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
+ body = {
+ "properties": {
+ "published_policy.fo": {
+ "type": "text",
+ "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
}
}
}
- }
- }
Index(new_index_name).create()
Index(new_index_name).put_mapping(doc_type=doc, body=body)
- reindex(connections.get_connection(), aggregate_index_name,
- new_index_name)
+ reindex(connections.get_connection(), aggregate_index_name, new_index_name)
Index(aggregate_index_name).delete()
for forensic_index in forensic_indexes:
pass
-def save_aggregate_report_to_elasticsearch(aggregate_report,
- index_suffix=None,
- index_prefix=None,
- monthly_indexes=False,
- number_of_shards=1,
- number_of_replicas=0):
+def save_aggregate_report_to_elasticsearch(
+ aggregate_report,
+ index_suffix=None,
+ index_prefix=None,
+ monthly_indexes=False,
+ number_of_shards=1,
+ number_of_replicas=0,
+):
"""
Saves a parsed DMARC aggregate report to Elasticsearch
@@ -374,10 +393,8 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
org_name = metadata["org_name"]
report_id = metadata["report_id"]
domain = aggregate_report["policy_published"]["domain"]
- begin_date = human_timestamp_to_datetime(metadata["begin_date"],
- to_utc=True)
- end_date = human_timestamp_to_datetime(metadata["end_date"],
- to_utc=True)
+ begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
+ end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
if monthly_indexes:
@@ -386,8 +403,7 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
index_date = begin_date.strftime("%Y-%m-%d")
aggregate_report["begin_date"] = begin_date
aggregate_report["end_date"] = end_date
- date_range = [aggregate_report["begin_date"],
- aggregate_report["end_date"]]
+ date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
@@ -409,18 +425,20 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
try:
existing = search.execute()
except Exception as error_:
- raise ElasticsearchError("Elasticsearch's search for existing report \
- error: {}".format(error_.__str__()))
+ raise ElasticsearchError(
+ "Elasticsearch's search for existing report \
+ error: {}".format(error_.__str__())
+ )
if len(existing) > 0:
- raise AlreadySaved("An aggregate report ID {0} from {1} about {2} "
- "with a date range of {3} UTC to {4} UTC already "
- "exists in "
- "Elasticsearch".format(report_id,
- org_name,
- domain,
- begin_date_human,
- end_date_human))
+ raise AlreadySaved(
+ "An aggregate report ID {0} from {1} about {2} "
+ "with a date range of {3} UTC to {4} UTC already "
+ "exists in "
+ "Elasticsearch".format(
+ report_id, org_name, domain, begin_date_human, end_date_human
+ )
+ )
published_policy = _PublishedPolicy(
domain=aggregate_report["policy_published"]["domain"],
adkim=aggregate_report["policy_published"]["adkim"],
@@ -428,7 +446,7 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
p=aggregate_report["policy_published"]["p"],
sp=aggregate_report["policy_published"]["sp"],
pct=aggregate_report["policy_published"]["pct"],
- fo=aggregate_report["policy_published"]["fo"]
+ fo=aggregate_report["policy_published"]["fo"],
)
for record in aggregate_report["records"]:
@@ -451,28 +469,33 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
source_name=record["source"]["name"],
message_count=record["count"],
disposition=record["policy_evaluated"]["disposition"],
- dkim_aligned=record["policy_evaluated"]["dkim"] is not None and
- record["policy_evaluated"]["dkim"].lower() == "pass",
- spf_aligned=record["policy_evaluated"]["spf"] is not None and
- record["policy_evaluated"]["spf"].lower() == "pass",
+ dkim_aligned=record["policy_evaluated"]["dkim"] is not None
+ and record["policy_evaluated"]["dkim"].lower() == "pass",
+ spf_aligned=record["policy_evaluated"]["spf"] is not None
+ and record["policy_evaluated"]["spf"].lower() == "pass",
header_from=record["identifiers"]["header_from"],
envelope_from=record["identifiers"]["envelope_from"],
- envelope_to=record["identifiers"]["envelope_to"]
+ envelope_to=record["identifiers"]["envelope_to"],
)
for override in record["policy_evaluated"]["policy_override_reasons"]:
- agg_doc.add_policy_override(type_=override["type"],
- comment=override["comment"])
+ agg_doc.add_policy_override(
+ type_=override["type"], comment=override["comment"]
+ )
for dkim_result in record["auth_results"]["dkim"]:
- agg_doc.add_dkim_result(domain=dkim_result["domain"],
- selector=dkim_result["selector"],
- result=dkim_result["result"])
+ agg_doc.add_dkim_result(
+ domain=dkim_result["domain"],
+ selector=dkim_result["selector"],
+ result=dkim_result["result"],
+ )
for spf_result in record["auth_results"]["spf"]:
- agg_doc.add_spf_result(domain=spf_result["domain"],
- scope=spf_result["scope"],
- result=spf_result["result"])
+ agg_doc.add_spf_result(
+ domain=spf_result["domain"],
+ scope=spf_result["scope"],
+ result=spf_result["result"],
+ )
index = "dmarc_aggregate"
if index_suffix:
@@ -481,41 +504,43 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
index = "{0}{1}".format(index_prefix, index)
index = "{0}-{1}".format(index, index_date)
- index_settings = dict(number_of_shards=number_of_shards,
- number_of_replicas=number_of_replicas)
+ index_settings = dict(
+ number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+ )
create_indexes([index], index_settings)
agg_doc.meta.index = index
try:
agg_doc.save()
except Exception as e:
- raise ElasticsearchError(
- "Elasticsearch error: {0}".format(e.__str__()))
+ raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
-def save_forensic_report_to_elasticsearch(forensic_report,
- index_suffix=None,
- index_prefix=None,
- monthly_indexes=False,
- number_of_shards=1,
- number_of_replicas=0):
+def save_forensic_report_to_elasticsearch(
+ forensic_report,
+ index_suffix=None,
+ index_prefix=None,
+ monthly_indexes=False,
+ number_of_shards=1,
+ number_of_replicas=0,
+):
"""
- Saves a parsed DMARC forensic report to Elasticsearch
-
- Args:
- forensic_report (OrderedDict): A parsed forensic report
- index_suffix (str): The suffix of the name of the index to save to
- index_prefix (str): The prefix of the name of the index to save to
- monthly_indexes (bool): Use monthly indexes instead of daily
- indexes
- number_of_shards (int): The number of shards to use in the index
- number_of_replicas (int): The number of replicas to use in the
- index
-
- Raises:
- AlreadySaved
+ Saves a parsed DMARC forensic report to Elasticsearch
- """
+ Args:
+ forensic_report (OrderedDict): A parsed forensic report
+ index_suffix (str): The suffix of the name of the index to save to
+ index_prefix (str): The prefix of the name of the index to save to
+ monthly_indexes (bool): Use monthly indexes instead of daily
+ indexes
+ number_of_shards (int): The number of shards to use in the index
+ number_of_replicas (int): The number of replicas to use in the
+ index
+
+ Raises:
+ AlreadySaved
+
+ """
logger.info("Saving forensic report to Elasticsearch")
forensic_report = forensic_report.copy()
sample_date = None
@@ -560,14 +585,12 @@ def save_forensic_report_to_elasticsearch(forensic_report,
existing = search.execute()
if len(existing) > 0:
- raise AlreadySaved("A forensic sample to {0} from {1} "
- "with a subject of {2} and arrival date of {3} "
- "already exists in "
- "Elasticsearch".format(to_,
- from_,
- subject,
- arrival_date_human
- ))
+ raise AlreadySaved(
+ "A forensic sample to {0} from {1} "
+ "with a subject of {2} and arrival date of {3} "
+ "already exists in "
+ "Elasticsearch".format(to_, from_, subject, arrival_date_human)
+ )
parsed_sample = forensic_report["parsed_sample"]
sample = _ForensicSampleDoc(
@@ -577,25 +600,25 @@ def save_forensic_report_to_elasticsearch(forensic_report,
date=sample_date,
subject=forensic_report["parsed_sample"]["subject"],
filename_safe_subject=parsed_sample["filename_safe_subject"],
- body=forensic_report["parsed_sample"]["body"]
+ body=forensic_report["parsed_sample"]["body"],
)
for address in forensic_report["parsed_sample"]["to"]:
- sample.add_to(display_name=address["display_name"],
- address=address["address"])
+ sample.add_to(display_name=address["display_name"], address=address["address"])
for address in forensic_report["parsed_sample"]["reply_to"]:
- sample.add_reply_to(display_name=address["display_name"],
- address=address["address"])
+ sample.add_reply_to(
+ display_name=address["display_name"], address=address["address"]
+ )
for address in forensic_report["parsed_sample"]["cc"]:
- sample.add_cc(display_name=address["display_name"],
- address=address["address"])
+ sample.add_cc(display_name=address["display_name"], address=address["address"])
for address in forensic_report["parsed_sample"]["bcc"]:
- sample.add_bcc(display_name=address["display_name"],
- address=address["address"])
+ sample.add_bcc(display_name=address["display_name"], address=address["address"])
for attachment in forensic_report["parsed_sample"]["attachments"]:
- sample.add_attachment(filename=attachment["filename"],
- content_type=attachment["mail_content_type"],
- sha256=attachment["sha256"])
+ sample.add_attachment(
+ filename=attachment["filename"],
+ content_type=attachment["mail_content_type"],
+ sha256=attachment["sha256"],
+ )
try:
forensic_doc = _ForensicReportDoc(
feedback_type=forensic_report["feedback_type"],
@@ -611,12 +634,11 @@ def save_forensic_report_to_elasticsearch(forensic_report,
source_country=forensic_report["source"]["country"],
source_reverse_dns=forensic_report["source"]["reverse_dns"],
source_base_domain=forensic_report["source"]["base_domain"],
- authentication_mechanisms=forensic_report[
- "authentication_mechanisms"],
+ authentication_mechanisms=forensic_report["authentication_mechanisms"],
auth_failure=forensic_report["auth_failure"],
dkim_domain=forensic_report["dkim_domain"],
original_rcpt_to=forensic_report["original_rcpt_to"],
- sample=sample
+ sample=sample,
)
index = "dmarc_forensic"
@@ -629,26 +651,29 @@ def save_forensic_report_to_elasticsearch(forensic_report,
else:
index_date = arrival_date.strftime("%Y-%m-%d")
index = "{0}-{1}".format(index, index_date)
- index_settings = dict(number_of_shards=number_of_shards,
- number_of_replicas=number_of_replicas)
+ index_settings = dict(
+ number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+ )
create_indexes([index], index_settings)
forensic_doc.meta.index = index
try:
forensic_doc.save()
except Exception as e:
- raise ElasticsearchError(
- "Elasticsearch error: {0}".format(e.__str__()))
+ raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
except KeyError as e:
raise InvalidForensicReport(
- "Forensic report missing required field: {0}".format(e.__str__()))
+ "Forensic report missing required field: {0}".format(e.__str__())
+ )
-def save_smtp_tls_report_to_elasticsearch(report,
- index_suffix=None,
- index_prefix=None,
- monthly_indexes=False,
- number_of_shards=1,
- number_of_replicas=0):
+def save_smtp_tls_report_to_elasticsearch(
+ report,
+ index_suffix=None,
+ index_prefix=None,
+ monthly_indexes=False,
+ number_of_shards=1,
+ number_of_replicas=0,
+):
"""
Saves a parsed SMTP TLS report to Elasticsearch
@@ -666,10 +691,8 @@ def save_smtp_tls_report_to_elasticsearch(report,
logger.info("Saving smtp tls report to Elasticsearch")
org_name = report["organization_name"]
report_id = report["report_id"]
- begin_date = human_timestamp_to_datetime(report["begin_date"],
- to_utc=True)
- end_date = human_timestamp_to_datetime(report["end_date"],
- to_utc=True)
+ begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
+ end_date = human_timestamp_to_datetime(report["end_date"], to_utc=True)
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
if monthly_indexes:
@@ -698,15 +721,19 @@ def save_smtp_tls_report_to_elasticsearch(report,
try:
existing = search.execute()
except Exception as error_:
- raise ElasticsearchError("Elasticsearch's search for existing report \
- error: {}".format(error_.__str__()))
+ raise ElasticsearchError(
+ "Elasticsearch's search for existing report \
+ error: {}".format(error_.__str__())
+ )
if len(existing) > 0:
- raise AlreadySaved(f"An SMTP TLS report ID {report_id} from "
- f" {org_name} with a date range of "
- f"{begin_date_human} UTC to "
- f"{end_date_human} UTC already "
- "exists in Elasticsearch")
+ raise AlreadySaved(
+ f"An SMTP TLS report ID {report_id} from "
+ f" {org_name} with a date range of "
+ f"{begin_date_human} UTC to "
+ f"{end_date_human} UTC already "
+ "exists in Elasticsearch"
+ )
index = "smtp_tls"
if index_suffix:
@@ -714,8 +741,9 @@ def save_smtp_tls_report_to_elasticsearch(report,
if index_prefix:
index = "{0}{1}".format(index_prefix, index)
index = "{0}-{1}".format(index, index_date)
- index_settings = dict(number_of_shards=number_of_shards,
- number_of_replicas=number_of_replicas)
+ index_settings = dict(
+ number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+ )
smtp_tls_doc = _SMTPTLSReportDoc(
org_name=report["organization_name"],
@@ -723,10 +751,10 @@ def save_smtp_tls_report_to_elasticsearch(report,
date_begin=report["begin_date"],
date_end=report["end_date"],
contact_info=report["contact_info"],
- report_id=report["report_id"]
+ report_id=report["report_id"],
)
- for policy in report['policies']:
+ for policy in report["policies"]:
policy_strings = None
mx_host_patterns = None
if "policy_strings" in policy:
@@ -739,7 +767,7 @@ def save_smtp_tls_report_to_elasticsearch(report,
succesful_session_count=policy["successful_session_count"],
failed_session_count=policy["failed_session_count"],
policy_string=policy_strings,
- mx_host_patterns=mx_host_patterns
+ mx_host_patterns=mx_host_patterns,
)
if "failure_details" in policy:
for failure_detail in policy["failure_details"]:
@@ -752,11 +780,11 @@ def save_smtp_tls_report_to_elasticsearch(report,
sending_mta_ip = None
if "receiving_mx_hostname" in failure_detail:
- receiving_mx_hostname = failure_detail[
- "receiving_mx_hostname"]
+ receiving_mx_hostname = failure_detail["receiving_mx_hostname"]
if "additional_information_uri" in failure_detail:
additional_information_uri = failure_detail[
- "additional_information_uri"]
+ "additional_information_uri"
+ ]
if "failure_reason_code" in failure_detail:
failure_reason_code = failure_detail["failure_reason_code"]
if "ip_address" in failure_detail:
@@ -772,12 +800,11 @@ def save_smtp_tls_report_to_elasticsearch(report,
ip_address=ip_address,
receiving_ip=receiving_ip,
receiving_mx_helo=receiving_mx_helo,
- failed_session_count=failure_detail[
- "failed_session_count"],
+ failed_session_count=failure_detail["failed_session_count"],
sending_mta_ip=sending_mta_ip,
receiving_mx_hostname=receiving_mx_hostname,
additional_information_uri=additional_information_uri,
- failure_reason_code=failure_reason_code
+ failure_reason_code=failure_reason_code,
)
smtp_tls_doc.policies.append(policy_doc)
@@ -787,5 +814,4 @@ def save_smtp_tls_report_to_elasticsearch(report,
try:
smtp_tls_doc.save()
except Exception as e:
- raise ElasticsearchError(
- "Elasticsearch error: {0}".format(e.__str__()))
+ raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
diff --git a/parsedmarc/gelf.py b/parsedmarc/gelf.py
index a74928c5..9e5c9dac 100644
--- a/parsedmarc/gelf.py
+++ b/parsedmarc/gelf.py
@@ -5,8 +5,11 @@
import json
import threading
-from parsedmarc import parsed_aggregate_reports_to_csv_rows, \
- parsed_forensic_reports_to_csv_rows, parsed_smtp_tls_reports_to_csv_rows
+from parsedmarc import (
+ parsed_aggregate_reports_to_csv_rows,
+ parsed_forensic_reports_to_csv_rows,
+ parsed_smtp_tls_reports_to_csv_rows,
+)
from pygelf import GelfTcpHandler, GelfUdpHandler, GelfTlsHandler
@@ -14,7 +17,6 @@
class ContextFilter(logging.Filter):
-
def filter(self, record):
record.parsedmarc = log_context_data.parsedmarc
return True
@@ -33,23 +35,24 @@ def __init__(self, host, port, mode):
"""
self.host = host
self.port = port
- self.logger = logging.getLogger('parsedmarc_syslog')
+ self.logger = logging.getLogger("parsedmarc_syslog")
self.logger.setLevel(logging.INFO)
self.logger.addFilter(ContextFilter())
self.gelf_mode = {
- 'udp': GelfUdpHandler,
- 'tcp': GelfTcpHandler,
- 'tls': GelfTlsHandler,
+ "udp": GelfUdpHandler,
+ "tcp": GelfTcpHandler,
+ "tls": GelfTlsHandler,
}
- self.handler = self.gelf_mode[mode](host=self.host, port=self.port,
- include_extra_fields=True)
+ self.handler = self.gelf_mode[mode](
+ host=self.host, port=self.port, include_extra_fields=True
+ )
self.logger.addHandler(self.handler)
def save_aggregate_report_to_gelf(self, aggregate_reports):
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
for row in rows:
log_context_data.parsedmarc = row
- self.logger.info('parsedmarc aggregate report')
+ self.logger.info("parsedmarc aggregate report")
log_context_data.parsedmarc = None
diff --git a/parsedmarc/kafkaclient.py b/parsedmarc/kafkaclient.py
index 68eef1f4..35d1c2dd 100644
--- a/parsedmarc/kafkaclient.py
+++ b/parsedmarc/kafkaclient.py
@@ -17,8 +17,9 @@ class KafkaError(RuntimeError):
class KafkaClient(object):
- def __init__(self, kafka_hosts, ssl=False, username=None,
- password=None, ssl_context=None):
+ def __init__(
+ self, kafka_hosts, ssl=False, username=None, password=None, ssl_context=None
+ ):
"""
Initializes the Kafka client
Args:
@@ -37,10 +38,11 @@ def __init__(self, kafka_hosts, ssl=False, username=None,
``$ConnectionString``, and the password is the
Azure Event Hub connection string.
"""
- config = dict(value_serializer=lambda v: json.dumps(v).encode(
- 'utf-8'),
- bootstrap_servers=kafka_hosts,
- client_id="parsedmarc-{0}".format(__version__))
+ config = dict(
+ value_serializer=lambda v: json.dumps(v).encode("utf-8"),
+ bootstrap_servers=kafka_hosts,
+ client_id="parsedmarc-{0}".format(__version__),
+ )
if ssl or username or password:
config["security_protocol"] = "SSL"
config["ssl_context"] = ssl_context or create_default_context()
@@ -55,14 +57,14 @@ def __init__(self, kafka_hosts, ssl=False, username=None,
@staticmethod
def strip_metadata(report):
"""
- Duplicates org_name, org_email and report_id into JSON root
- and removes report_metadata key to bring it more inline
- with Elastic output.
+ Duplicates org_name, org_email and report_id into JSON root
+ and removes report_metadata key to bring it more inline
+ with Elastic output.
"""
- report['org_name'] = report['report_metadata']['org_name']
- report['org_email'] = report['report_metadata']['org_email']
- report['report_id'] = report['report_metadata']['report_id']
- report.pop('report_metadata')
+ report["org_name"] = report["report_metadata"]["org_name"]
+ report["org_email"] = report["report_metadata"]["org_email"]
+ report["report_id"] = report["report_metadata"]["report_id"]
+ report.pop("report_metadata")
return report
@@ -80,13 +82,11 @@ def generate_daterange(report):
end_date = human_timestamp_to_datetime(metadata["end_date"])
begin_date_human = begin_date.strftime("%Y-%m-%dT%H:%M:%S")
end_date_human = end_date.strftime("%Y-%m-%dT%H:%M:%S")
- date_range = [begin_date_human,
- end_date_human]
+ date_range = [begin_date_human, end_date_human]
logger.debug("date_range is {}".format(date_range))
return date_range
- def save_aggregate_reports_to_kafka(self, aggregate_reports,
- aggregate_topic):
+ def save_aggregate_reports_to_kafka(self, aggregate_reports, aggregate_topic):
"""
Saves aggregate DMARC reports to Kafka
@@ -96,38 +96,38 @@ def save_aggregate_reports_to_kafka(self, aggregate_reports,
aggregate_topic (str): The name of the Kafka topic
"""
- if (isinstance(aggregate_reports, dict) or
- isinstance(aggregate_reports, OrderedDict)):
+ if isinstance(aggregate_reports, dict) or isinstance(
+ aggregate_reports, OrderedDict
+ ):
aggregate_reports = [aggregate_reports]
if len(aggregate_reports) < 1:
return
for report in aggregate_reports:
- report['date_range'] = self.generate_daterange(report)
+ report["date_range"] = self.generate_daterange(report)
report = self.strip_metadata(report)
- for slice in report['records']:
- slice['date_range'] = report['date_range']
- slice['org_name'] = report['org_name']
- slice['org_email'] = report['org_email']
- slice['policy_published'] = report['policy_published']
- slice['report_id'] = report['report_id']
+ for slice in report["records"]:
+ slice["date_range"] = report["date_range"]
+ slice["org_name"] = report["org_name"]
+ slice["org_email"] = report["org_email"]
+ slice["policy_published"] = report["policy_published"]
+ slice["report_id"] = report["report_id"]
logger.debug("Sending slice.")
try:
logger.debug("Saving aggregate report to Kafka")
self.producer.send(aggregate_topic, slice)
except UnknownTopicOrPartitionError:
raise KafkaError(
- "Kafka error: Unknown topic or partition on broker")
+ "Kafka error: Unknown topic or partition on broker"
+ )
except Exception as e:
- raise KafkaError(
- "Kafka error: {0}".format(e.__str__()))
+ raise KafkaError("Kafka error: {0}".format(e.__str__()))
try:
self.producer.flush()
except Exception as e:
- raise KafkaError(
- "Kafka error: {0}".format(e.__str__()))
+ raise KafkaError("Kafka error: {0}".format(e.__str__()))
def save_forensic_reports_to_kafka(self, forensic_reports, forensic_topic):
"""
@@ -151,16 +151,13 @@ def save_forensic_reports_to_kafka(self, forensic_reports, forensic_topic):
logger.debug("Saving forensic reports to Kafka")
self.producer.send(forensic_topic, forensic_reports)
except UnknownTopicOrPartitionError:
- raise KafkaError(
- "Kafka error: Unknown topic or partition on broker")
+ raise KafkaError("Kafka error: Unknown topic or partition on broker")
except Exception as e:
- raise KafkaError(
- "Kafka error: {0}".format(e.__str__()))
+ raise KafkaError("Kafka error: {0}".format(e.__str__()))
try:
self.producer.flush()
except Exception as e:
- raise KafkaError(
- "Kafka error: {0}".format(e.__str__()))
+ raise KafkaError("Kafka error: {0}".format(e.__str__()))
def save_smtp_tls_reports_to_kafka(self, smtp_tls_reports, smtp_tls_topic):
"""
@@ -184,13 +181,10 @@ def save_smtp_tls_reports_to_kafka(self, smtp_tls_reports, smtp_tls_topic):
logger.debug("Saving forensic reports to Kafka")
self.producer.send(smtp_tls_topic, smtp_tls_reports)
except UnknownTopicOrPartitionError:
- raise KafkaError(
- "Kafka error: Unknown topic or partition on broker")
+ raise KafkaError("Kafka error: Unknown topic or partition on broker")
except Exception as e:
- raise KafkaError(
- "Kafka error: {0}".format(e.__str__()))
+ raise KafkaError("Kafka error: {0}".format(e.__str__()))
try:
self.producer.flush()
except Exception as e:
- raise KafkaError(
- "Kafka error: {0}".format(e.__str__()))
+ raise KafkaError("Kafka error: {0}".format(e.__str__()))
diff --git a/parsedmarc/loganalytics.py b/parsedmarc/loganalytics.py
index 13996132..3192f4dc 100644
--- a/parsedmarc/loganalytics.py
+++ b/parsedmarc/loganalytics.py
@@ -9,7 +9,7 @@ class LogAnalyticsException(Exception):
"""Raised when an Elasticsearch error occurs"""
-class LogAnalyticsConfig():
+class LogAnalyticsConfig:
"""
The LogAnalyticsConfig class is used to define the configuration
for the Log Analytics Client.
@@ -41,16 +41,18 @@ class LogAnalyticsConfig():
the SMTP TLS Reports
need to be pushed.
"""
+
def __init__(
- self,
- client_id: str,
- client_secret: str,
- tenant_id: str,
- dce: str,
- dcr_immutable_id: str,
- dcr_aggregate_stream: str,
- dcr_forensic_stream: str,
- dcr_smtp_tls_stream: str):
+ self,
+ client_id: str,
+ client_secret: str,
+ tenant_id: str,
+ dce: str,
+ dcr_immutable_id: str,
+ dcr_aggregate_stream: str,
+ dcr_forensic_stream: str,
+ dcr_smtp_tls_stream: str,
+ ):
self.client_id = client_id
self.client_secret = client_secret
self.tenant_id = tenant_id
@@ -67,16 +69,18 @@ class LogAnalyticsClient(object):
the generated DMARC reports to Log Analytics
via Data Collection Rules.
"""
+
def __init__(
- self,
- client_id: str,
- client_secret: str,
- tenant_id: str,
- dce: str,
- dcr_immutable_id: str,
- dcr_aggregate_stream: str,
- dcr_forensic_stream: str,
- dcr_smtp_tls_stream: str):
+ self,
+ client_id: str,
+ client_secret: str,
+ tenant_id: str,
+ dce: str,
+ dcr_immutable_id: str,
+ dcr_aggregate_stream: str,
+ dcr_forensic_stream: str,
+ dcr_smtp_tls_stream: str,
+ ):
self.conf = LogAnalyticsConfig(
client_id=client_id,
client_secret=client_secret,
@@ -85,23 +89,20 @@ def __init__(
dcr_immutable_id=dcr_immutable_id,
dcr_aggregate_stream=dcr_aggregate_stream,
dcr_forensic_stream=dcr_forensic_stream,
- dcr_smtp_tls_stream=dcr_smtp_tls_stream
+ dcr_smtp_tls_stream=dcr_smtp_tls_stream,
)
if (
- not self.conf.client_id or
- not self.conf.client_secret or
- not self.conf.tenant_id or
- not self.conf.dce or
- not self.conf.dcr_immutable_id):
+ not self.conf.client_id
+ or not self.conf.client_secret
+ or not self.conf.tenant_id
+ or not self.conf.dce
+ or not self.conf.dcr_immutable_id
+ ):
raise LogAnalyticsException(
- "Invalid configuration. " +
- "One or more required settings are missing.")
+ "Invalid configuration. " + "One or more required settings are missing."
+ )
- def publish_json(
- self,
- results,
- logs_client: LogsIngestionClient,
- dcr_stream: str):
+ def publish_json(self, results, logs_client: LogsIngestionClient, dcr_stream: str):
"""
Background function to publish given
DMARC report to specific Data Collection Rule.
@@ -117,16 +118,10 @@ def publish_json(
try:
logs_client.upload(self.conf.dcr_immutable_id, dcr_stream, results)
except HttpResponseError as e:
- raise LogAnalyticsException(
- "Upload failed: {error}"
- .format(error=e))
+ raise LogAnalyticsException("Upload failed: {error}".format(error=e))
def publish_results(
- self,
- results,
- save_aggregate: bool,
- save_forensic: bool,
- save_smtp_tls: bool
+ self, results, save_aggregate: bool, save_forensic: bool, save_smtp_tls: bool
):
"""
Function to publish DMARC and/or SMTP TLS reports to Log Analytics
@@ -148,39 +143,39 @@ def publish_results(
credential = ClientSecretCredential(
tenant_id=conf.tenant_id,
client_id=conf.client_id,
- client_secret=conf.client_secret
+ client_secret=conf.client_secret,
)
logs_client = LogsIngestionClient(conf.dce, credential=credential)
if (
- results['aggregate_reports'] and
- conf.dcr_aggregate_stream and
- len(results['aggregate_reports']) > 0 and
- save_aggregate):
+ results["aggregate_reports"]
+ and conf.dcr_aggregate_stream
+ and len(results["aggregate_reports"]) > 0
+ and save_aggregate
+ ):
logger.info("Publishing aggregate reports.")
self.publish_json(
- results['aggregate_reports'],
- logs_client,
- conf.dcr_aggregate_stream)
+ results["aggregate_reports"], logs_client, conf.dcr_aggregate_stream
+ )
logger.info("Successfully pushed aggregate reports.")
if (
- results['forensic_reports'] and
- conf.dcr_forensic_stream and
- len(results['forensic_reports']) > 0 and
- save_forensic):
+ results["forensic_reports"]
+ and conf.dcr_forensic_stream
+ and len(results["forensic_reports"]) > 0
+ and save_forensic
+ ):
logger.info("Publishing forensic reports.")
self.publish_json(
- results['forensic_reports'],
- logs_client,
- conf.dcr_forensic_stream)
+ results["forensic_reports"], logs_client, conf.dcr_forensic_stream
+ )
logger.info("Successfully pushed forensic reports.")
if (
- results['smtp_tls_reports'] and
- conf.dcr_smtp_tls_stream and
- len(results['smtp_tls_reports']) > 0 and
- save_smtp_tls):
+ results["smtp_tls_reports"]
+ and conf.dcr_smtp_tls_stream
+ and len(results["smtp_tls_reports"]) > 0
+ and save_smtp_tls
+ ):
logger.info("Publishing SMTP TLS reports.")
self.publish_json(
- results['smtp_tls_reports'],
- logs_client,
- conf.dcr_smtp_tls_stream)
+ results["smtp_tls_reports"], logs_client, conf.dcr_smtp_tls_stream
+ )
logger.info("Successfully pushed SMTP TLS reports.")
diff --git a/parsedmarc/mail/__init__.py b/parsedmarc/mail/__init__.py
index 3256baa9..79939cc6 100644
--- a/parsedmarc/mail/__init__.py
+++ b/parsedmarc/mail/__init__.py
@@ -4,8 +4,10 @@
from parsedmarc.mail.imap import IMAPConnection
from parsedmarc.mail.maildir import MaildirConnection
-__all__ = ["MailboxConnection",
- "MSGraphConnection",
- "GmailConnection",
- "IMAPConnection",
- "MaildirConnection"]
+__all__ = [
+ "MailboxConnection",
+ "MSGraphConnection",
+ "GmailConnection",
+ "IMAPConnection",
+ "MaildirConnection",
+]
diff --git a/parsedmarc/mail/gmail.py b/parsedmarc/mail/gmail.py
index 436e1f02..b426746f 100644
--- a/parsedmarc/mail/gmail.py
+++ b/parsedmarc/mail/gmail.py
@@ -25,45 +25,47 @@ def _get_creds(token_file, credentials_file, scopes, oauth2_port):
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
- flow = InstalledAppFlow.from_client_secrets_file(
- credentials_file, scopes)
- creds = flow.run_local_server(open_browser=False,
- oauth2_port=oauth2_port)
+ flow = InstalledAppFlow.from_client_secrets_file(credentials_file, scopes)
+ creds = flow.run_local_server(open_browser=False, oauth2_port=oauth2_port)
# Save the credentials for the next run
- with Path(token_file).open('w') as token:
+ with Path(token_file).open("w") as token:
token.write(creds.to_json())
return creds
class GmailConnection(MailboxConnection):
- def __init__(self,
- token_file: str,
- credentials_file: str,
- scopes: List[str],
- include_spam_trash: bool,
- reports_folder: str,
- oauth2_port: int,
- paginate_messages: bool):
+ def __init__(
+ self,
+ token_file: str,
+ credentials_file: str,
+ scopes: List[str],
+ include_spam_trash: bool,
+ reports_folder: str,
+ oauth2_port: int,
+ paginate_messages: bool,
+ ):
creds = _get_creds(token_file, credentials_file, scopes, oauth2_port)
- self.service = build('gmail', 'v1', credentials=creds)
+ self.service = build("gmail", "v1", credentials=creds)
self.include_spam_trash = include_spam_trash
self.reports_label_id = self._find_label_id_for_label(reports_folder)
self.paginate_messages = paginate_messages
def create_folder(self, folder_name: str):
# Gmail doesn't support the name Archive
- if folder_name == 'Archive':
+ if folder_name == "Archive":
return
logger.debug(f"Creating label {folder_name}")
- request_body = {'name': folder_name, 'messageListVisibility': 'show'}
+ request_body = {"name": folder_name, "messageListVisibility": "show"}
try:
- self.service.users().labels()\
- .create(userId='me', body=request_body).execute()
+ self.service.users().labels().create(
+ userId="me", body=request_body
+ ).execute()
except HttpError as e:
if e.status_code == 409:
- logger.debug(f'Folder {folder_name} already exists, '
- f'skipping creation')
+ logger.debug(
+ f"Folder {folder_name} already exists, " f"skipping creation"
+ )
else:
raise e
@@ -93,44 +95,42 @@ def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
return [id for id in self._fetch_all_message_ids(reports_label_id)]
def fetch_message(self, message_id):
- msg = self.service.users().messages()\
- .get(userId='me',
- id=message_id,
- format="raw"
- )\
+ msg = (
+ self.service.users()
+ .messages()
+ .get(userId="me", id=message_id, format="raw")
.execute()
- return urlsafe_b64decode(msg['raw'])
+ )
+ return urlsafe_b64decode(msg["raw"])
def delete_message(self, message_id: str):
- self.service.users().messages().delete(userId='me', id=message_id)
+ self.service.users().messages().delete(userId="me", id=message_id)
def move_message(self, message_id: str, folder_name: str):
label_id = self._find_label_id_for_label(folder_name)
logger.debug(f"Moving message UID {message_id} to {folder_name}")
request_body = {
- 'addLabelIds': [label_id],
- 'removeLabelIds': [self.reports_label_id]
+ "addLabelIds": [label_id],
+ "removeLabelIds": [self.reports_label_id],
}
- self.service.users().messages()\
- .modify(userId='me',
- id=message_id,
- body=request_body)\
- .execute()
+ self.service.users().messages().modify(
+ userId="me", id=message_id, body=request_body
+ ).execute()
def keepalive(self):
# Not needed
pass
def watch(self, check_callback, check_timeout):
- """ Checks the mailbox for new messages every n seconds"""
+ """Checks the mailbox for new messages every n seconds"""
while True:
sleep(check_timeout)
check_callback(self)
@lru_cache(maxsize=10)
def _find_label_id_for_label(self, label_name: str) -> str:
- results = self.service.users().labels().list(userId='me').execute()
- labels = results.get('labels', [])
+ results = self.service.users().labels().list(userId="me").execute()
+ labels = results.get("labels", [])
for label in labels:
- if label_name == label['id'] or label_name == label['name']:
- return label['id']
+ if label_name == label["id"] or label_name == label["name"]:
+ return label["id"]
diff --git a/parsedmarc/mail/graph.py b/parsedmarc/mail/graph.py
index 2fc4364d..f022ca25 100644
--- a/parsedmarc/mail/graph.py
+++ b/parsedmarc/mail/graph.py
@@ -4,9 +4,13 @@
from time import sleep
from typing import List, Optional
-from azure.identity import UsernamePasswordCredential, \
- DeviceCodeCredential, ClientSecretCredential, \
- TokenCachePersistenceOptions, AuthenticationRecord
+from azure.identity import (
+ UsernamePasswordCredential,
+ DeviceCodeCredential,
+ ClientSecretCredential,
+ TokenCachePersistenceOptions,
+ AuthenticationRecord,
+)
from msgraph.core import GraphClient
from parsedmarc.log import logger
@@ -21,15 +25,15 @@ class AuthMethod(Enum):
def _get_cache_args(token_path: Path, allow_unencrypted_storage):
cache_args = {
- 'cache_persistence_options':
- TokenCachePersistenceOptions(
- name='parsedmarc',
- allow_unencrypted_storage=allow_unencrypted_storage)
+ "cache_persistence_options": TokenCachePersistenceOptions(
+ name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
+ )
}
auth_record = _load_token(token_path)
if auth_record:
- cache_args['authentication_record'] = \
- AuthenticationRecord.deserialize(auth_record)
+ cache_args["authentication_record"] = AuthenticationRecord.deserialize(
+ auth_record
+ )
return cache_args
@@ -42,53 +46,57 @@ def _load_token(token_path: Path) -> Optional[str]:
def _cache_auth_record(record: AuthenticationRecord, token_path: Path):
token = record.serialize()
- with token_path.open('w') as token_file:
+ with token_path.open("w") as token_file:
token_file.write(token)
def _generate_credential(auth_method: str, token_path: Path, **kwargs):
if auth_method == AuthMethod.DeviceCode.name:
credential = DeviceCodeCredential(
- client_id=kwargs['client_id'],
+ client_id=kwargs["client_id"],
disable_automatic_authentication=True,
- tenant_id=kwargs['tenant_id'],
+ tenant_id=kwargs["tenant_id"],
**_get_cache_args(
token_path,
- allow_unencrypted_storage=kwargs['allow_unencrypted_storage'])
+ allow_unencrypted_storage=kwargs["allow_unencrypted_storage"],
+ ),
)
elif auth_method == AuthMethod.UsernamePassword.name:
credential = UsernamePasswordCredential(
- client_id=kwargs['client_id'],
- client_credential=kwargs['client_secret'],
+ client_id=kwargs["client_id"],
+ client_credential=kwargs["client_secret"],
disable_automatic_authentication=True,
- username=kwargs['username'],
- password=kwargs['password'],
+ username=kwargs["username"],
+ password=kwargs["password"],
**_get_cache_args(
token_path,
- allow_unencrypted_storage=kwargs['allow_unencrypted_storage'])
+ allow_unencrypted_storage=kwargs["allow_unencrypted_storage"],
+ ),
)
elif auth_method == AuthMethod.ClientSecret.name:
credential = ClientSecretCredential(
- client_id=kwargs['client_id'],
- tenant_id=kwargs['tenant_id'],
- client_secret=kwargs['client_secret']
+ client_id=kwargs["client_id"],
+ tenant_id=kwargs["tenant_id"],
+ client_secret=kwargs["client_secret"],
)
else:
- raise RuntimeError(f'Auth method {auth_method} not found')
+ raise RuntimeError(f"Auth method {auth_method} not found")
return credential
class MSGraphConnection(MailboxConnection):
- def __init__(self,
- auth_method: str,
- mailbox: str,
- client_id: str,
- client_secret: str,
- username: str,
- password: str,
- tenant_id: str,
- token_file: str,
- allow_unencrypted_storage: bool):
+ def __init__(
+ self,
+ auth_method: str,
+ mailbox: str,
+ client_id: str,
+ client_secret: str,
+ username: str,
+ password: str,
+ tenant_id: str,
+ token_file: str,
+ allow_unencrypted_storage: bool,
+ ):
token_path = Path(token_file)
credential = _generate_credential(
auth_method,
@@ -98,156 +106,149 @@ def __init__(self,
password=password,
tenant_id=tenant_id,
token_path=token_path,
- allow_unencrypted_storage=allow_unencrypted_storage)
- client_params = {
- 'credential': credential
- }
+ allow_unencrypted_storage=allow_unencrypted_storage,
+ )
+ client_params = {"credential": credential}
if not isinstance(credential, ClientSecretCredential):
- scopes = ['Mail.ReadWrite']
+ scopes = ["Mail.ReadWrite"]
# Detect if mailbox is shared
if mailbox and username != mailbox:
- scopes = ['Mail.ReadWrite.Shared']
+ scopes = ["Mail.ReadWrite.Shared"]
auth_record = credential.authenticate(scopes=scopes)
_cache_auth_record(auth_record, token_path)
- client_params['scopes'] = scopes
+ client_params["scopes"] = scopes
self._client = GraphClient(**client_params)
self.mailbox_name = mailbox
def create_folder(self, folder_name: str):
- sub_url = ''
- path_parts = folder_name.split('/')
+ sub_url = ""
+ path_parts = folder_name.split("/")
if len(path_parts) > 1: # Folder is a subFolder
parent_folder_id = None
for folder in path_parts[:-1]:
parent_folder_id = self._find_folder_id_with_parent(
- folder, parent_folder_id)
- sub_url = f'/{parent_folder_id}/childFolders'
+ folder, parent_folder_id
+ )
+ sub_url = f"/{parent_folder_id}/childFolders"
folder_name = path_parts[-1]
- request_body = {
- 'displayName': folder_name
- }
- request_url = f'/users/{self.mailbox_name}/mailFolders{sub_url}'
+ request_body = {"displayName": folder_name}
+ request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
resp = self._client.post(request_url, json=request_body)
if resp.status_code == 409:
- logger.debug(f'Folder {folder_name} already exists, '
- f'skipping creation')
+ logger.debug(f"Folder {folder_name} already exists, " f"skipping creation")
elif resp.status_code == 201:
- logger.debug(f'Created folder {folder_name}')
+ logger.debug(f"Created folder {folder_name}")
else:
- logger.warning(f'Unknown response '
- f'{resp.status_code} {resp.json()}')
+ logger.warning(f"Unknown response " f"{resp.status_code} {resp.json()}")
def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
- """ Returns a list of message UIDs in the specified folder """
+ """Returns a list of message UIDs in the specified folder"""
folder_id = self._find_folder_id_from_folder_path(folder_name)
- url = f'/users/{self.mailbox_name}/mailFolders/' \
- f'{folder_id}/messages'
- batch_size = kwargs.get('batch_size')
+ url = f"/users/{self.mailbox_name}/mailFolders/" f"{folder_id}/messages"
+ batch_size = kwargs.get("batch_size")
if not batch_size:
batch_size = 0
emails = self._get_all_messages(url, batch_size)
- return [email['id'] for email in emails]
+ return [email["id"] for email in emails]
def _get_all_messages(self, url, batch_size):
messages: list
- params = {
- '$select': 'id'
- }
+ params = {"$select": "id"}
if batch_size and batch_size > 0:
- params['$top'] = batch_size
+ params["$top"] = batch_size
else:
- params['$top'] = 100
+ params["$top"] = 100
result = self._client.get(url, params=params)
if result.status_code != 200:
- raise RuntimeError(f'Failed to fetch messages {result.text}')
- messages = result.json()['value']
+ raise RuntimeError(f"Failed to fetch messages {result.text}")
+ messages = result.json()["value"]
# Loop if next page is present and not obtained message limit.
- while '@odata.nextLink' in result.json() and (
- batch_size == 0 or
- batch_size - len(messages) > 0):
- result = self._client.get(result.json()['@odata.nextLink'])
+ while "@odata.nextLink" in result.json() and (
+ batch_size == 0 or batch_size - len(messages) > 0
+ ):
+ result = self._client.get(result.json()["@odata.nextLink"])
if result.status_code != 200:
- raise RuntimeError(f'Failed to fetch messages {result.text}')
- messages.extend(result.json()['value'])
+ raise RuntimeError(f"Failed to fetch messages {result.text}")
+ messages.extend(result.json()["value"])
return messages
def mark_message_read(self, message_id: str):
"""Marks a message as read"""
- url = f'/users/{self.mailbox_name}/messages/{message_id}'
+ url = f"/users/{self.mailbox_name}/messages/{message_id}"
resp = self._client.patch(url, json={"isRead": "true"})
if resp.status_code != 200:
- raise RuntimeWarning(f"Failed to mark message read"
- f"{resp.status_code}: {resp.json()}")
+ raise RuntimeWarning(
+ f"Failed to mark message read" f"{resp.status_code}: {resp.json()}"
+ )
def fetch_message(self, message_id: str):
- url = f'/users/{self.mailbox_name}/messages/{message_id}/$value'
+ url = f"/users/{self.mailbox_name}/messages/{message_id}/$value"
result = self._client.get(url)
if result.status_code != 200:
- raise RuntimeWarning(f"Failed to fetch message"
- f"{result.status_code}: {result.json()}")
+ raise RuntimeWarning(
+ f"Failed to fetch message" f"{result.status_code}: {result.json()}"
+ )
self.mark_message_read(message_id)
return result.text
def delete_message(self, message_id: str):
- url = f'/users/{self.mailbox_name}/messages/{message_id}'
+ url = f"/users/{self.mailbox_name}/messages/{message_id}"
resp = self._client.delete(url)
if resp.status_code != 204:
- raise RuntimeWarning(f"Failed to delete message "
- f"{resp.status_code}: {resp.json()}")
+ raise RuntimeWarning(
+ f"Failed to delete message " f"{resp.status_code}: {resp.json()}"
+ )
def move_message(self, message_id: str, folder_name: str):
folder_id = self._find_folder_id_from_folder_path(folder_name)
- request_body = {
- 'destinationId': folder_id
- }
- url = f'/users/{self.mailbox_name}/messages/{message_id}/move'
+ request_body = {"destinationId": folder_id}
+ url = f"/users/{self.mailbox_name}/messages/{message_id}/move"
resp = self._client.post(url, json=request_body)
if resp.status_code != 201:
- raise RuntimeWarning(f"Failed to move message "
- f"{resp.status_code}: {resp.json()}")
+ raise RuntimeWarning(
+ f"Failed to move message " f"{resp.status_code}: {resp.json()}"
+ )
def keepalive(self):
# Not needed
pass
def watch(self, check_callback, check_timeout):
- """ Checks the mailbox for new messages every n seconds"""
+ """Checks the mailbox for new messages every n seconds"""
while True:
sleep(check_timeout)
check_callback(self)
@lru_cache(maxsize=10)
def _find_folder_id_from_folder_path(self, folder_name: str) -> str:
- path_parts = folder_name.split('/')
+ path_parts = folder_name.split("/")
parent_folder_id = None
if len(path_parts) > 1:
for folder in path_parts[:-1]:
- folder_id = self._find_folder_id_with_parent(
- folder, parent_folder_id)
+ folder_id = self._find_folder_id_with_parent(folder, parent_folder_id)
parent_folder_id = folder_id
- return self._find_folder_id_with_parent(
- path_parts[-1], parent_folder_id)
+ return self._find_folder_id_with_parent(path_parts[-1], parent_folder_id)
else:
return self._find_folder_id_with_parent(folder_name, None)
- def _find_folder_id_with_parent(self,
- folder_name: str,
- parent_folder_id: Optional[str]):
- sub_url = ''
+ def _find_folder_id_with_parent(
+ self, folder_name: str, parent_folder_id: Optional[str]
+ ):
+ sub_url = ""
if parent_folder_id is not None:
- sub_url = f'/{parent_folder_id}/childFolders'
- url = f'/users/{self.mailbox_name}/mailFolders{sub_url}'
+ sub_url = f"/{parent_folder_id}/childFolders"
+ url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
filter = f"?$filter=displayName eq '{folder_name}'"
folders_resp = self._client.get(url + filter)
if folders_resp.status_code != 200:
- raise RuntimeWarning(f"Failed to list folders."
- f"{folders_resp.json()}")
- folders: list = folders_resp.json()['value']
- matched_folders = [folder for folder in folders
- if folder['displayName'] == folder_name]
+ raise RuntimeWarning(f"Failed to list folders." f"{folders_resp.json()}")
+ folders: list = folders_resp.json()["value"]
+ matched_folders = [
+ folder for folder in folders if folder["displayName"] == folder_name
+ ]
if len(matched_folders) == 0:
raise RuntimeError(f"folder {folder_name} not found")
selected_folder = matched_folders[0]
- return selected_folder['id']
+ return selected_folder["id"]
diff --git a/parsedmarc/mail/imap.py b/parsedmarc/mail/imap.py
index 4ffa55fd..403bbeb7 100644
--- a/parsedmarc/mail/imap.py
+++ b/parsedmarc/mail/imap.py
@@ -9,30 +9,30 @@
class IMAPConnection(MailboxConnection):
- def __init__(self,
- host=None,
- user=None,
- password=None,
- port=None,
- ssl=True,
- verify=True,
- timeout=30,
- max_retries=4):
+ def __init__(
+ self,
+ host=None,
+ user=None,
+ password=None,
+ port=None,
+ ssl=True,
+ verify=True,
+ timeout=30,
+ max_retries=4,
+ ):
self._username = user
self._password = password
self._verify = verify
- self._client = IMAPClient(host, user, password, port=port,
- ssl=ssl, verify=verify,
- timeout=timeout,
- max_retries=max_retries)
-
- def get_folder_separator(self):
- try:
- namespaces = self._client.namespace()
- personal = namespaces.personal[0]
- return personal[1]
- except (IndexError, NameError):
- return '/'
+ self._client = IMAPClient(
+ host,
+ user,
+ password,
+ port=port,
+ ssl=ssl,
+ verify=verify,
+ timeout=timeout,
+ max_retries=max_retries,
+ )
def create_folder(self, folder_name: str):
self._client.create_folder(folder_name)
@@ -55,8 +55,8 @@ def keepalive(self):
def watch(self, check_callback, check_timeout):
"""
- Use an IDLE IMAP connection to parse incoming emails,
- and pass the results to a callback function
+ Use an IDLE IMAP connection to parse incoming emails,
+ and pass the results to a callback function
"""
# IDLE callback sends IMAPClient object,
@@ -67,18 +67,21 @@ def idle_callback_wrapper(client: IMAPClient):
while True:
try:
- IMAPClient(host=self._client.host,
- username=self._username,
- password=self._password,
- port=self._client.port,
- ssl=self._client.ssl,
- verify=self._verify,
- idle_callback=idle_callback_wrapper,
- idle_timeout=check_timeout)
+ IMAPClient(
+ host=self._client.host,
+ username=self._username,
+ password=self._password,
+ port=self._client.port,
+ ssl=self._client.ssl,
+ verify=self._verify,
+ idle_callback=idle_callback_wrapper,
+ idle_timeout=check_timeout,
+ )
except (timeout, IMAPClientError):
logger.warning("IMAP connection timeout. Reconnecting...")
sleep(check_timeout)
except Exception as e:
- logger.warning("IMAP connection error. {0}. "
- "Reconnecting...".format(e))
+ logger.warning(
+ "IMAP connection error. {0}. " "Reconnecting...".format(e)
+ )
sleep(check_timeout)
diff --git a/parsedmarc/mail/mailbox_connection.py b/parsedmarc/mail/mailbox_connection.py
index ba7c2cf7..ef94b797 100644
--- a/parsedmarc/mail/mailbox_connection.py
+++ b/parsedmarc/mail/mailbox_connection.py
@@ -6,15 +6,11 @@ class MailboxConnection(ABC):
"""
Interface for a mailbox connection
"""
- def get_folder_separator(self):
- return "/"
def create_folder(self, folder_name: str):
raise NotImplementedError
- def fetch_messages(self,
- reports_folder: str,
- **kwargs) -> List[str]:
+ def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
raise NotImplementedError
def fetch_message(self, message_id) -> str:
diff --git a/parsedmarc/mail/maildir.py b/parsedmarc/mail/maildir.py
index 17d3f54d..dd6481a8 100644
--- a/parsedmarc/mail/maildir.py
+++ b/parsedmarc/mail/maildir.py
@@ -7,28 +7,30 @@
class MaildirConnection(MailboxConnection):
- def __init__(self,
- maildir_path=None,
- maildir_create=False,
- ):
+ def __init__(
+ self,
+ maildir_path=None,
+ maildir_create=False,
+ ):
self._maildir_path = maildir_path
self._maildir_create = maildir_create
maildir_owner = os.stat(maildir_path).st_uid
if os.getuid() != maildir_owner:
if os.getuid() == 0:
- logger.warning("Switching uid to {} to access Maildir".format(
- maildir_owner))
+ logger.warning(
+ "Switching uid to {} to access Maildir".format(maildir_owner)
+ )
os.setuid(maildir_owner)
else:
- ex = 'runtime uid {} differ from maildir {} owner {}'.format(
- os.getuid(), maildir_path, maildir_owner)
+ ex = "runtime uid {} differ from maildir {} owner {}".format(
+ os.getuid(), maildir_path, maildir_owner
+ )
raise Exception(ex)
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
self._subfolder_client = {}
def create_folder(self, folder_name: str):
- self._subfolder_client[folder_name] = self._client.add_folder(
- folder_name)
+ self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
self._client.add_folder(folder_name)
def fetch_messages(self, reports_folder: str, **kwargs):
@@ -43,8 +45,9 @@ def delete_message(self, message_id: str):
def move_message(self, message_id: str, folder_name: str):
message_data = self._client.get(message_id)
if folder_name not in self._subfolder_client.keys():
- self._subfolder_client = mailbox.Maildir(os.join(
- self.maildir_path, folder_name), create=self.maildir_create)
+ self._subfolder_client = mailbox.Maildir(
+ os.join(self.maildir_path, folder_name), create=self.maildir_create
+ )
self._subfolder_client[folder_name].add(message_data)
self._client.remove(message_id)
diff --git a/parsedmarc/opensearch.py b/parsedmarc/opensearch.py
index f8a7b1e8..6bb41367 100644
--- a/parsedmarc/opensearch.py
+++ b/parsedmarc/opensearch.py
@@ -2,8 +2,21 @@
from collections import OrderedDict
-from opensearchpy import Q, connections, Object, Document, Index, Nested, \
- InnerDoc, Integer, Text, Boolean, Ip, Date, Search
+from opensearchpy import (
+ Q,
+ connections,
+ Object,
+ Document,
+ Index,
+ Nested,
+ InnerDoc,
+ Integer,
+ Text,
+ Boolean,
+ Ip,
+ Date,
+ Search,
+)
from opensearchpy.helpers import reindex
from parsedmarc.log import logger
@@ -75,24 +88,21 @@ class Index:
spf_results = Nested(_SPFResult)
def add_policy_override(self, type_, comment):
- self.policy_overrides.append(_PolicyOverride(type=type_,
- comment=comment))
+ self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
def add_dkim_result(self, domain, selector, result):
- self.dkim_results.append(_DKIMResult(domain=domain,
- selector=selector,
- result=result))
+ self.dkim_results.append(
+ _DKIMResult(domain=domain, selector=selector, result=result)
+ )
def add_spf_result(self, domain, scope, result):
- self.spf_results.append(_SPFResult(domain=domain,
- scope=scope,
- result=result))
+ self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
- def save(self, ** kwargs):
+ def save(self, **kwargs):
self.passed_dmarc = False
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
- return super().save(** kwargs)
+ return super().save(**kwargs)
class _EmailAddressDoc(InnerDoc):
@@ -122,24 +132,25 @@ class _ForensicSampleDoc(InnerDoc):
attachments = Nested(_EmailAttachmentDoc)
def add_to(self, display_name, address):
- self.to.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
def add_reply_to(self, display_name, address):
- self.reply_to.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.reply_to.append(
+ _EmailAddressDoc(display_name=display_name, address=address)
+ )
def add_cc(self, display_name, address):
- self.cc.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
def add_bcc(self, display_name, address):
- self.bcc.append(_EmailAddressDoc(display_name=display_name,
- address=address))
+ self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
def add_attachment(self, filename, content_type, sha256):
- self.attachments.append(_EmailAttachmentDoc(filename=filename,
- content_type=content_type, sha256=sha256))
+ self.attachments.append(
+ _EmailAttachmentDoc(
+ filename=filename, content_type=content_type, sha256=sha256
+ )
+ )
class _ForensicReportDoc(Document):
@@ -184,13 +195,17 @@ class _SMTPTLSPolicyDoc(InnerDoc):
failed_session_count = Integer()
failure_details = Nested(_SMTPTLSFailureDetailsDoc)
- def add_failure_details(self, result_type, ip_address,
- receiving_ip,
- receiving_mx_helo,
- failed_session_count,
- receiving_mx_hostname=None,
- additional_information_uri=None,
- failure_reason_code=None):
+ def add_failure_details(
+ self,
+ result_type,
+ ip_address,
+ receiving_ip,
+ receiving_mx_helo,
+ failed_session_count,
+ receiving_mx_hostname=None,
+ additional_information_uri=None,
+ failure_reason_code=None,
+ ):
self.failure_details.append(
result_type=result_type,
ip_address=ip_address,
@@ -199,12 +214,11 @@ def add_failure_details(self, result_type, ip_address,
receiving_ip=receiving_ip,
failed_session_count=failed_session_count,
additional_information=additional_information_uri,
- failure_reason_code=failure_reason_code
+ failure_reason_code=failure_reason_code,
)
class _SMTPTLSFailureReportDoc(Document):
-
class Index:
name = "smtp_tls"
@@ -216,27 +230,40 @@ class Index:
report_id = Text()
policies = Nested(_SMTPTLSPolicyDoc)
- def add_policy(self, policy_type, policy_domain,
- successful_session_count,
- failed_session_count,
- policy_string=None,
- mx_host_patterns=None,
- failure_details=None):
- self.policies.append(policy_type=policy_type,
- policy_domain=policy_domain,
- successful_session_count=successful_session_count,
- failed_session_count=failed_session_count,
- policy_string=policy_string,
- mx_host_patterns=mx_host_patterns,
- failure_details=failure_details)
+ def add_policy(
+ self,
+ policy_type,
+ policy_domain,
+ successful_session_count,
+ failed_session_count,
+ policy_string=None,
+ mx_host_patterns=None,
+ failure_details=None,
+ ):
+ self.policies.append(
+ policy_type=policy_type,
+ policy_domain=policy_domain,
+ successful_session_count=successful_session_count,
+ failed_session_count=failed_session_count,
+ policy_string=policy_string,
+ mx_host_patterns=mx_host_patterns,
+ failure_details=failure_details,
+ )
class AlreadySaved(ValueError):
"""Raised when a report to be saved matches an existing report"""
-def set_hosts(hosts, use_ssl=False, ssl_cert_path=None,
- username=None, password=None, apiKey=None, timeout=60.0):
+def set_hosts(
+ hosts,
+ use_ssl=False,
+ ssl_cert_path=None,
+ username=None,
+ password=None,
+ apiKey=None,
+ timeout=60.0,
+):
"""
Sets the OpenSearch hosts to use
@@ -251,21 +278,18 @@ def set_hosts(hosts, use_ssl=False, ssl_cert_path=None,
"""
if not isinstance(hosts, list):
hosts = [hosts]
- conn_params = {
- "hosts": hosts,
- "timeout": timeout
- }
+ conn_params = {"hosts": hosts, "timeout": timeout}
if use_ssl:
- conn_params['use_ssl'] = True
+ conn_params["use_ssl"] = True
if ssl_cert_path:
- conn_params['verify_certs'] = True
- conn_params['ca_certs'] = ssl_cert_path
+ conn_params["verify_certs"] = True
+ conn_params["ca_certs"] = ssl_cert_path
else:
- conn_params['verify_certs'] = False
+ conn_params["verify_certs"] = False
if username:
- conn_params['http_auth'] = (username+":"+password)
+ conn_params["http_auth"] = username + ":" + password
if apiKey:
- conn_params['api_key'] = apiKey
+ conn_params["api_key"] = apiKey
connections.create_connection(**conn_params)
@@ -284,14 +308,12 @@ def create_indexes(names, settings=None):
if not index.exists():
logger.debug("Creating OpenSearch index: {0}".format(name))
if settings is None:
- index.settings(number_of_shards=1,
- number_of_replicas=0)
+ index.settings(number_of_shards=1, number_of_replicas=0)
else:
index.settings(**settings)
index.create()
except Exception as e:
- raise OpenSearchError(
- "OpenSearch error: {0}".format(e.__str__()))
+ raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
@@ -323,33 +345,31 @@ def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
fo_type = fo_mapping["type"]
if fo_type == "long":
new_index_name = "{0}-v{1}".format(aggregate_index_name, version)
- body = {"properties": {"published_policy.fo": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
+ body = {
+ "properties": {
+ "published_policy.fo": {
+ "type": "text",
+ "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
}
}
}
- }
- }
Index(new_index_name).create()
Index(new_index_name).put_mapping(doc_type=doc, body=body)
- reindex(connections.get_connection(), aggregate_index_name,
- new_index_name)
+ reindex(connections.get_connection(), aggregate_index_name, new_index_name)
Index(aggregate_index_name).delete()
for forensic_index in forensic_indexes:
pass
-def save_aggregate_report_to_opensearch(aggregate_report,
- index_suffix=None,
- index_prefix=None,
- monthly_indexes=False,
- number_of_shards=1,
- number_of_replicas=0):
+def save_aggregate_report_to_opensearch(
+ aggregate_report,
+ index_suffix=None,
+ index_prefix=None,
+ monthly_indexes=False,
+ number_of_shards=1,
+ number_of_replicas=0,
+):
"""
Saves a parsed DMARC aggregate report to OpenSearch
@@ -370,10 +390,8 @@ def save_aggregate_report_to_opensearch(aggregate_report,
org_name = metadata["org_name"]
report_id = metadata["report_id"]
domain = aggregate_report["policy_published"]["domain"]
- begin_date = human_timestamp_to_datetime(metadata["begin_date"],
- to_utc=True)
- end_date = human_timestamp_to_datetime(metadata["end_date"],
- to_utc=True)
+ begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
+ end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
if monthly_indexes:
@@ -382,8 +400,7 @@ def save_aggregate_report_to_opensearch(aggregate_report,
index_date = begin_date.strftime("%Y-%m-%d")
aggregate_report["begin_date"] = begin_date
aggregate_report["end_date"] = end_date
- date_range = [aggregate_report["begin_date"],
- aggregate_report["end_date"]]
+ date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
@@ -405,18 +422,20 @@ def save_aggregate_report_to_opensearch(aggregate_report,
try:
existing = search.execute()
except Exception as error_:
- raise OpenSearchError("OpenSearch's search for existing report \
- error: {}".format(error_.__str__()))
+ raise OpenSearchError(
+ "OpenSearch's search for existing report \
+ error: {}".format(error_.__str__())
+ )
if len(existing) > 0:
- raise AlreadySaved("An aggregate report ID {0} from {1} about {2} "
- "with a date range of {3} UTC to {4} UTC already "
- "exists in "
- "OpenSearch".format(report_id,
- org_name,
- domain,
- begin_date_human,
- end_date_human))
+ raise AlreadySaved(
+ "An aggregate report ID {0} from {1} about {2} "
+ "with a date range of {3} UTC to {4} UTC already "
+ "exists in "
+ "OpenSearch".format(
+ report_id, org_name, domain, begin_date_human, end_date_human
+ )
+ )
published_policy = _PublishedPolicy(
domain=aggregate_report["policy_published"]["domain"],
adkim=aggregate_report["policy_published"]["adkim"],
@@ -424,7 +443,7 @@ def save_aggregate_report_to_opensearch(aggregate_report,
p=aggregate_report["policy_published"]["p"],
sp=aggregate_report["policy_published"]["sp"],
pct=aggregate_report["policy_published"]["pct"],
- fo=aggregate_report["policy_published"]["fo"]
+ fo=aggregate_report["policy_published"]["fo"],
)
for record in aggregate_report["records"]:
@@ -447,28 +466,33 @@ def save_aggregate_report_to_opensearch(aggregate_report,
source_name=record["source"]["name"],
message_count=record["count"],
disposition=record["policy_evaluated"]["disposition"],
- dkim_aligned=record["policy_evaluated"]["dkim"] is not None and
- record["policy_evaluated"]["dkim"].lower() == "pass",
- spf_aligned=record["policy_evaluated"]["spf"] is not None and
- record["policy_evaluated"]["spf"].lower() == "pass",
+ dkim_aligned=record["policy_evaluated"]["dkim"] is not None
+ and record["policy_evaluated"]["dkim"].lower() == "pass",
+ spf_aligned=record["policy_evaluated"]["spf"] is not None
+ and record["policy_evaluated"]["spf"].lower() == "pass",
header_from=record["identifiers"]["header_from"],
envelope_from=record["identifiers"]["envelope_from"],
- envelope_to=record["identifiers"]["envelope_to"]
+ envelope_to=record["identifiers"]["envelope_to"],
)
for override in record["policy_evaluated"]["policy_override_reasons"]:
- agg_doc.add_policy_override(type_=override["type"],
- comment=override["comment"])
+ agg_doc.add_policy_override(
+ type_=override["type"], comment=override["comment"]
+ )
for dkim_result in record["auth_results"]["dkim"]:
- agg_doc.add_dkim_result(domain=dkim_result["domain"],
- selector=dkim_result["selector"],
- result=dkim_result["result"])
+ agg_doc.add_dkim_result(
+ domain=dkim_result["domain"],
+ selector=dkim_result["selector"],
+ result=dkim_result["result"],
+ )
for spf_result in record["auth_results"]["spf"]:
- agg_doc.add_spf_result(domain=spf_result["domain"],
- scope=spf_result["scope"],
- result=spf_result["result"])
+ agg_doc.add_spf_result(
+ domain=spf_result["domain"],
+ scope=spf_result["scope"],
+ result=spf_result["result"],
+ )
index = "dmarc_aggregate"
if index_suffix:
@@ -476,41 +500,43 @@ def save_aggregate_report_to_opensearch(aggregate_report,
if index_prefix:
index = "{0}{1}".format(index_prefix, index)
index = "{0}-{1}".format(index, index_date)
- index_settings = dict(number_of_shards=number_of_shards,
- number_of_replicas=number_of_replicas)
+ index_settings = dict(
+ number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+ )
create_indexes([index], index_settings)
agg_doc.meta.index = index
try:
agg_doc.save()
except Exception as e:
- raise OpenSearchError(
- "OpenSearch error: {0}".format(e.__str__()))
+ raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
-def save_forensic_report_to_opensearch(forensic_report,
- index_suffix=None,
- index_prefix=None,
- monthly_indexes=False,
- number_of_shards=1,
- number_of_replicas=0):
+def save_forensic_report_to_opensearch(
+ forensic_report,
+ index_suffix=None,
+ index_prefix=None,
+ monthly_indexes=False,
+ number_of_shards=1,
+ number_of_replicas=0,
+):
"""
- Saves a parsed DMARC forensic report to OpenSearch
-
- Args:
- forensic_report (OrderedDict): A parsed forensic report
- index_suffix (str): The suffix of the name of the index to save to
- index_prefix (str): The prefix of the name of the index to save to
- monthly_indexes (bool): Use monthly indexes instead of daily
- indexes
- number_of_shards (int): The number of shards to use in the index
- number_of_replicas (int): The number of replicas to use in the
- index
-
- Raises:
- AlreadySaved
+ Saves a parsed DMARC forensic report to OpenSearch
- """
+ Args:
+ forensic_report (OrderedDict): A parsed forensic report
+ index_suffix (str): The suffix of the name of the index to save to
+ index_prefix (str): The prefix of the name of the index to save to
+ monthly_indexes (bool): Use monthly indexes instead of daily
+ indexes
+ number_of_shards (int): The number of shards to use in the index
+ number_of_replicas (int): The number of replicas to use in the
+ index
+
+ Raises:
+ AlreadySaved
+
+ """
logger.info("Saving forensic report to OpenSearch")
forensic_report = forensic_report.copy()
sample_date = None
@@ -555,12 +581,12 @@ def save_forensic_report_to_opensearch(forensic_report,
existing = search.execute()
if len(existing) > 0:
- raise AlreadySaved("A forensic sample to {0} from {1} "
- "with a subject of {2} and arrival date of {3} "
- "already exists in "
- "OpenSearch".format(
- to_, from_, subject, arrival_date_human
- ))
+ raise AlreadySaved(
+ "A forensic sample to {0} from {1} "
+ "with a subject of {2} and arrival date of {3} "
+ "already exists in "
+ "OpenSearch".format(to_, from_, subject, arrival_date_human)
+ )
parsed_sample = forensic_report["parsed_sample"]
sample = _ForensicSampleDoc(
@@ -570,25 +596,25 @@ def save_forensic_report_to_opensearch(forensic_report,
date=sample_date,
subject=forensic_report["parsed_sample"]["subject"],
filename_safe_subject=parsed_sample["filename_safe_subject"],
- body=forensic_report["parsed_sample"]["body"]
+ body=forensic_report["parsed_sample"]["body"],
)
for address in forensic_report["parsed_sample"]["to"]:
- sample.add_to(display_name=address["display_name"],
- address=address["address"])
+ sample.add_to(display_name=address["display_name"], address=address["address"])
for address in forensic_report["parsed_sample"]["reply_to"]:
- sample.add_reply_to(display_name=address["display_name"],
- address=address["address"])
+ sample.add_reply_to(
+ display_name=address["display_name"], address=address["address"]
+ )
for address in forensic_report["parsed_sample"]["cc"]:
- sample.add_cc(display_name=address["display_name"],
- address=address["address"])
+ sample.add_cc(display_name=address["display_name"], address=address["address"])
for address in forensic_report["parsed_sample"]["bcc"]:
- sample.add_bcc(display_name=address["display_name"],
- address=address["address"])
+ sample.add_bcc(display_name=address["display_name"], address=address["address"])
for attachment in forensic_report["parsed_sample"]["attachments"]:
- sample.add_attachment(filename=attachment["filename"],
- content_type=attachment["mail_content_type"],
- sha256=attachment["sha256"])
+ sample.add_attachment(
+ filename=attachment["filename"],
+ content_type=attachment["mail_content_type"],
+ sha256=attachment["sha256"],
+ )
try:
forensic_doc = _ForensicReportDoc(
feedback_type=forensic_report["feedback_type"],
@@ -604,12 +630,11 @@ def save_forensic_report_to_opensearch(forensic_report,
source_country=forensic_report["source"]["country"],
source_reverse_dns=forensic_report["source"]["reverse_dns"],
source_base_domain=forensic_report["source"]["base_domain"],
- authentication_mechanisms=forensic_report[
- "authentication_mechanisms"],
+ authentication_mechanisms=forensic_report["authentication_mechanisms"],
auth_failure=forensic_report["auth_failure"],
dkim_domain=forensic_report["dkim_domain"],
original_rcpt_to=forensic_report["original_rcpt_to"],
- sample=sample
+ sample=sample,
)
index = "dmarc_forensic"
@@ -622,26 +647,29 @@ def save_forensic_report_to_opensearch(forensic_report,
else:
index_date = arrival_date.strftime("%Y-%m-%d")
index = "{0}-{1}".format(index, index_date)
- index_settings = dict(number_of_shards=number_of_shards,
- number_of_replicas=number_of_replicas)
+ index_settings = dict(
+ number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+ )
create_indexes([index], index_settings)
forensic_doc.meta.index = index
try:
forensic_doc.save()
except Exception as e:
- raise OpenSearchError(
- "OpenSearch error: {0}".format(e.__str__()))
+ raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
except KeyError as e:
raise InvalidForensicReport(
- "Forensic report missing required field: {0}".format(e.__str__()))
+ "Forensic report missing required field: {0}".format(e.__str__())
+ )
-def save_smtp_tls_report_to_opensearch(report,
- index_suffix=None,
- index_prefix=None,
- monthly_indexes=False,
- number_of_shards=1,
- number_of_replicas=0):
+def save_smtp_tls_report_to_opensearch(
+ report,
+ index_suffix=None,
+ index_prefix=None,
+ monthly_indexes=False,
+ number_of_shards=1,
+ number_of_replicas=0,
+):
"""
Saves a parsed SMTP TLS report to OpenSearch
@@ -659,10 +687,8 @@ def save_smtp_tls_report_to_opensearch(report,
logger.info("Saving aggregate report to OpenSearch")
org_name = report["org_name"]
report_id = report["report_id"]
- begin_date = human_timestamp_to_datetime(report["begin_date"],
- to_utc=True)
- end_date = human_timestamp_to_datetime(report["end_date"],
- to_utc=True)
+ begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
+ end_date = human_timestamp_to_datetime(report["end_date"], to_utc=True)
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
if monthly_indexes:
@@ -691,15 +717,19 @@ def save_smtp_tls_report_to_opensearch(report,
try:
existing = search.execute()
except Exception as error_:
- raise OpenSearchError("OpenSearch's search for existing report \
- error: {}".format(error_.__str__()))
+ raise OpenSearchError(
+ "OpenSearch's search for existing report \
+ error: {}".format(error_.__str__())
+ )
if len(existing) > 0:
- raise AlreadySaved(f"An SMTP TLS report ID {report_id} from "
- f" {org_name} with a date range of "
- f"{begin_date_human} UTC to "
- f"{end_date_human} UTC already "
- "exists in OpenSearch")
+ raise AlreadySaved(
+ f"An SMTP TLS report ID {report_id} from "
+ f" {org_name} with a date range of "
+ f"{begin_date_human} UTC to "
+ f"{end_date_human} UTC already "
+ "exists in OpenSearch"
+ )
index = "smtp_tls"
if index_suffix:
@@ -707,8 +737,9 @@ def save_smtp_tls_report_to_opensearch(report,
if index_prefix:
index = "{0}{1}".format(index_prefix, index)
index = "{0}-{1}".format(index, index_date)
- index_settings = dict(number_of_shards=number_of_shards,
- number_of_replicas=number_of_replicas)
+ index_settings = dict(
+ number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+ )
smtp_tls_doc = _SMTPTLSFailureReportDoc(
organization_name=report["organization_name"],
@@ -716,10 +747,10 @@ def save_smtp_tls_report_to_opensearch(report,
date_begin=report["date_begin"],
date_end=report["date_end"],
contact_info=report["contact_info"],
- report_id=report["report_id"]
+ report_id=report["report_id"],
)
- for policy in report['policies']:
+ for policy in report["policies"]:
policy_strings = None
mx_host_patterns = None
if "policy_strings" in policy:
@@ -730,7 +761,7 @@ def save_smtp_tls_report_to_opensearch(report,
policy_domain=policy["policy_domain"],
policy_type=policy["policy_type"],
policy_string=policy_strings,
- mx_host_patterns=mx_host_patterns
+ mx_host_patterns=mx_host_patterns,
)
if "failure_details" in policy:
failure_details = policy["failure_details"]
@@ -738,11 +769,11 @@ def save_smtp_tls_report_to_opensearch(report,
additional_information_uri = None
failure_reason_code = None
if "receiving_mx_hostname" in failure_details:
- receiving_mx_hostname = failure_details[
- "receiving_mx_hostname"]
+ receiving_mx_hostname = failure_details["receiving_mx_hostname"]
if "additional_information_uri" in failure_details:
additional_information_uri = failure_details[
- "additional_information_uri"]
+ "additional_information_uri"
+ ]
if "failure_reason_code" in failure_details:
failure_reason_code = failure_details["failure_reason_code"]
policy_doc.add_failure_details(
@@ -753,7 +784,7 @@ def save_smtp_tls_report_to_opensearch(report,
failed_session_count=failure_details["failed_session_count"],
receiving_mx_hostname=receiving_mx_hostname,
additional_information_uri=additional_information_uri,
- failure_reason_code=failure_reason_code
+ failure_reason_code=failure_reason_code,
)
smtp_tls_doc.policies.append(policy_doc)
@@ -763,5 +794,4 @@ def save_smtp_tls_report_to_opensearch(report,
try:
smtp_tls_doc.save()
except Exception as e:
- raise OpenSearchError(
- "OpenSearch error: {0}".format(e.__str__()))
+ raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
diff --git a/parsedmarc/s3.py b/parsedmarc/s3.py
index d7060467..1b6c3743 100644
--- a/parsedmarc/s3.py
+++ b/parsedmarc/s3.py
@@ -10,8 +10,15 @@
class S3Client(object):
"""A client for a Amazon S3"""
- def __init__(self, bucket_name, bucket_path, region_name, endpoint_url,
- access_key_id, secret_access_key):
+ def __init__(
+ self,
+ bucket_name,
+ bucket_path,
+ region_name,
+ endpoint_url,
+ access_key_id,
+ secret_access_key,
+ ):
"""
Initializes the S3Client
Args:
@@ -34,7 +41,7 @@ def __init__(self, bucket_name, bucket_path, region_name, endpoint_url,
# https://github.com/boto/boto3/blob/1.24.7/boto3/session.py#L312
self.s3 = boto3.resource(
- 's3',
+ "s3",
region_name=region_name,
endpoint_url=endpoint_url,
aws_access_key_id=access_key_id,
@@ -43,10 +50,10 @@ def __init__(self, bucket_name, bucket_path, region_name, endpoint_url,
self.bucket = self.s3.Bucket(self.bucket_name)
def save_aggregate_report_to_s3(self, report):
- self.save_report_to_s3(report, 'aggregate')
+ self.save_report_to_s3(report, "aggregate")
def save_forensic_report_to_s3(self, report):
- self.save_report_to_s3(report, 'forensic')
+ self.save_report_to_s3(report, "forensic")
def save_smtp_tls_report_to_s3(self, report):
self.save_report_to_s3(report, "smtp_tls")
@@ -67,19 +74,18 @@ def save_report_to_s3(self, report, report_type):
report_date.year,
report_date.month,
report_date.day,
- report_id
+ report_id,
+ )
+ logger.debug(
+ "Saving {0} report to s3://{1}/{2}".format(
+ report_type, self.bucket_name, object_path
+ )
)
- logger.debug("Saving {0} report to s3://{1}/{2}".format(
- report_type,
- self.bucket_name,
- object_path))
object_metadata = {
k: v
for k, v in report["report_metadata"].items()
if k in self.metadata_keys
}
self.bucket.put_object(
- Body=json.dumps(report),
- Key=object_path,
- Metadata=object_metadata
+ Body=json.dumps(report), Key=object_path, Metadata=object_metadata
)
diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py
index 9d94feb0..cf6a1e04 100644
--- a/parsedmarc/splunk.py
+++ b/parsedmarc/splunk.py
@@ -22,8 +22,9 @@ class HECClient(object):
# http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC
# http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector
- def __init__(self, url, access_token, index,
- source="parsedmarc", verify=True, timeout=60):
+ def __init__(
+ self, url, access_token, index, source="parsedmarc", verify=True, timeout=60
+ ):
"""
Initializes the HECClient
@@ -37,8 +38,9 @@ def __init__(self, url, access_token, index,
data before giving up
"""
url = urlparse(url)
- self.url = "{0}://{1}/services/collector/event/1.0".format(url.scheme,
- url.netloc)
+ self.url = "{0}://{1}/services/collector/event/1.0".format(
+ url.scheme, url.netloc
+ )
self.access_token = access_token.lstrip("Splunk ")
self.index = index
self.host = socket.getfqdn()
@@ -46,12 +48,11 @@ def __init__(self, url, access_token, index,
self.session = requests.Session()
self.timeout = timeout
self.session.verify = verify
- self._common_data = dict(host=self.host, source=self.source,
- index=self.index)
+ self._common_data = dict(host=self.host, source=self.source, index=self.index)
self.session.headers = {
"User-Agent": "parsedmarc/{0}".format(__version__),
- "Authorization": "Splunk {0}".format(self.access_token)
+ "Authorization": "Splunk {0}".format(self.access_token),
}
def save_aggregate_reports_to_splunk(self, aggregate_reports):
@@ -78,36 +79,26 @@ def save_aggregate_reports_to_splunk(self, aggregate_reports):
for metadata in report["report_metadata"]:
new_report[metadata] = report["report_metadata"][metadata]
new_report["published_policy"] = report["policy_published"]
- new_report["source_ip_address"] = record["source"][
- "ip_address"]
+ new_report["source_ip_address"] = record["source"]["ip_address"]
new_report["source_country"] = record["source"]["country"]
- new_report["source_reverse_dns"] = record["source"][
- "reverse_dns"]
- new_report["source_base_domain"] = record["source"][
- "base_domain"]
+ new_report["source_reverse_dns"] = record["source"]["reverse_dns"]
+ new_report["source_base_domain"] = record["source"]["base_domain"]
new_report["source_type"] = record["source"]["type"]
new_report["source_name"] = record["source"]["name"]
new_report["message_count"] = record["count"]
- new_report["disposition"] = record["policy_evaluated"][
- "disposition"
- ]
+ new_report["disposition"] = record["policy_evaluated"]["disposition"]
new_report["spf_aligned"] = record["alignment"]["spf"]
new_report["dkim_aligned"] = record["alignment"]["dkim"]
new_report["passed_dmarc"] = record["alignment"]["dmarc"]
- new_report["header_from"] = record["identifiers"][
- "header_from"]
- new_report["envelope_from"] = record["identifiers"][
- "envelope_from"]
+ new_report["header_from"] = record["identifiers"]["header_from"]
+ new_report["envelope_from"] = record["identifiers"]["envelope_from"]
if "dkim" in record["auth_results"]:
- new_report["dkim_results"] = record["auth_results"][
- "dkim"]
+ new_report["dkim_results"] = record["auth_results"]["dkim"]
if "spf" in record["auth_results"]:
- new_report["spf_results"] = record["auth_results"][
- "spf"]
+ new_report["spf_results"] = record["auth_results"]["spf"]
data["sourcetype"] = "dmarc:aggregate"
- timestamp = human_timestamp_to_unix_timestamp(
- new_report["begin_date"])
+ timestamp = human_timestamp_to_unix_timestamp(new_report["begin_date"])
data["time"] = timestamp
data["event"] = new_report.copy()
json_str += "{0}\n".format(json.dumps(data))
@@ -115,8 +106,7 @@ def save_aggregate_reports_to_splunk(self, aggregate_reports):
if not self.session.verify:
logger.debug("Skipping certificate verification for Splunk HEC")
try:
- response = self.session.post(self.url, data=json_str,
- timeout=self.timeout)
+ response = self.session.post(self.url, data=json_str, timeout=self.timeout)
response = response.json()
except Exception as e:
raise SplunkError(e.__str__())
@@ -142,8 +132,7 @@ def save_forensic_reports_to_splunk(self, forensic_reports):
for report in forensic_reports:
data = self._common_data.copy()
data["sourcetype"] = "dmarc:forensic"
- timestamp = human_timestamp_to_unix_timestamp(
- report["arrival_date_utc"])
+ timestamp = human_timestamp_to_unix_timestamp(report["arrival_date_utc"])
data["time"] = timestamp
data["event"] = report.copy()
json_str += "{0}\n".format(json.dumps(data))
@@ -151,8 +140,7 @@ def save_forensic_reports_to_splunk(self, forensic_reports):
if not self.session.verify:
logger.debug("Skipping certificate verification for Splunk HEC")
try:
- response = self.session.post(self.url, data=json_str,
- timeout=self.timeout)
+ response = self.session.post(self.url, data=json_str, timeout=self.timeout)
response = response.json()
except Exception as e:
raise SplunkError(e.__str__())
@@ -179,8 +167,7 @@ def save_smtp_tls_reports_to_splunk(self, reports):
json_str = ""
for report in reports:
data["sourcetype"] = "smtp:tls"
- timestamp = human_timestamp_to_unix_timestamp(
- report["begin_date"])
+ timestamp = human_timestamp_to_unix_timestamp(report["begin_date"])
data["time"] = timestamp
data["event"] = report.copy()
json_str += "{0}\n".format(json.dumps(data))
@@ -188,8 +175,7 @@ def save_smtp_tls_reports_to_splunk(self, reports):
if not self.session.verify:
logger.debug("Skipping certificate verification for Splunk HEC")
try:
- response = self.session.post(self.url, data=json_str,
- timeout=self.timeout)
+ response = self.session.post(self.url, data=json_str, timeout=self.timeout)
response = response.json()
except Exception as e:
raise SplunkError(e.__str__())
diff --git a/parsedmarc/syslog.py b/parsedmarc/syslog.py
index 0fc47002..c656aa8e 100644
--- a/parsedmarc/syslog.py
+++ b/parsedmarc/syslog.py
@@ -4,8 +4,11 @@
import logging.handlers
import json
-from parsedmarc import parsed_aggregate_reports_to_csv_rows, \
- parsed_forensic_reports_to_csv_rows, parsed_smtp_tls_reports_to_csv_rows
+from parsedmarc import (
+ parsed_aggregate_reports_to_csv_rows,
+ parsed_forensic_reports_to_csv_rows,
+ parsed_smtp_tls_reports_to_csv_rows,
+)
class SyslogClient(object):
@@ -20,10 +23,9 @@ def __init__(self, server_name, server_port):
"""
self.server_name = server_name
self.server_port = server_port
- self.logger = logging.getLogger('parsedmarc_syslog')
+ self.logger = logging.getLogger("parsedmarc_syslog")
self.logger.setLevel(logging.INFO)
- log_handler = logging.handlers.SysLogHandler(address=(server_name,
- server_port))
+ log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
self.logger.addHandler(log_handler)
def save_aggregate_report_to_syslog(self, aggregate_reports):
diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py
index 994fbb0d..37136ff7 100644
--- a/parsedmarc/utils.py
+++ b/parsedmarc/utils.py
@@ -38,7 +38,7 @@
import parsedmarc.resources.maps
-parenthesis_regex = re.compile(r'\s*\(.*\)\s*')
+parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
null_file = open(os.devnull, "w")
mailparser_logger = logging.getLogger("mailparser")
@@ -67,7 +67,7 @@ def decode_base64(data):
data = bytes(data, encoding="ascii")
missing_padding = len(data) % 4
if missing_padding != 0:
- data += b'=' * (4 - missing_padding)
+ data += b"=" * (4 - missing_padding)
return base64.b64decode(data)
@@ -116,24 +116,35 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
resolver = dns.resolver.Resolver()
timeout = float(timeout)
if nameservers is None:
- nameservers = ["1.1.1.1", "1.0.0.1",
- "2606:4700:4700::1111", "2606:4700:4700::1001",
- ]
+ nameservers = [
+ "1.1.1.1",
+ "1.0.0.1",
+ "2606:4700:4700::1111",
+ "2606:4700:4700::1001",
+ ]
resolver.nameservers = nameservers
resolver.timeout = timeout
resolver.lifetime = timeout
if record_type == "TXT":
- resource_records = list(map(
- lambda r: r.strings,
- resolver.resolve(domain, record_type, lifetime=timeout)))
+ resource_records = list(
+ map(
+ lambda r: r.strings,
+ resolver.resolve(domain, record_type, lifetime=timeout),
+ )
+ )
_resource_record = [
resource_record[0][:0].join(resource_record)
- for resource_record in resource_records if resource_record]
+ for resource_record in resource_records
+ if resource_record
+ ]
records = [r.decode() for r in _resource_record]
else:
- records = list(map(
- lambda r: r.to_text().replace('"', '').rstrip("."),
- resolver.resolve(domain, record_type, lifetime=timeout)))
+ records = list(
+ map(
+ lambda r: r.to_text().replace('"', "").rstrip("."),
+ resolver.resolve(domain, record_type, lifetime=timeout),
+ )
+ )
if cache:
cache[cache_key] = records
@@ -157,9 +168,9 @@ def get_reverse_dns(ip_address, cache=None, nameservers=None, timeout=2.0):
hostname = None
try:
address = dns.reversename.from_address(ip_address)
- hostname = query_dns(address, "PTR", cache=cache,
- nameservers=nameservers,
- timeout=timeout)[0]
+ hostname = query_dns(
+ address, "PTR", cache=cache, nameservers=nameservers, timeout=timeout
+ )[0]
except dns.exception.DNSException as e:
logger.warning(f"get_reverse_dns({ip_address}) exception: {e}")
@@ -256,9 +267,11 @@ def get_ip_address_country(ip_address, db_path=None):
if db_path is not None:
if os.path.isfile(db_path) is False:
db_path = None
- logger.warning(f"No file exists at {db_path}. Falling back to an "
- "included copy of the IPDB IP to Country "
- "Lite database.")
+ logger.warning(
+ f"No file exists at {db_path}. Falling back to an "
+ "included copy of the IPDB IP to Country "
+ "Lite database."
+ )
if db_path is None:
for system_path in db_paths:
@@ -267,12 +280,12 @@ def get_ip_address_country(ip_address, db_path=None):
break
if db_path is None:
- with pkg_resources.path(parsedmarc.resources.dbip,
- "dbip-country-lite.mmdb") as path:
+ with pkg_resources.path(
+ parsedmarc.resources.dbip, "dbip-country-lite.mmdb"
+ ) as path:
db_path = path
- db_age = datetime.now() - datetime.fromtimestamp(
- os.stat(db_path).st_mtime)
+ db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
if db_age > timedelta(days=30):
logger.warning("IP database is more than a month old")
@@ -288,12 +301,14 @@ def get_ip_address_country(ip_address, db_path=None):
return country
-def get_service_from_reverse_dns_base_domain(base_domain,
- always_use_local_file=False,
- local_file_path=None,
- url=None,
- offline=False,
- reverse_dns_map=None):
+def get_service_from_reverse_dns_base_domain(
+ base_domain,
+ always_use_local_file=False,
+ local_file_path=None,
+ url=None,
+ offline=False,
+ reverse_dns_map=None,
+):
"""
Returns the service name of a given base domain name from reverse DNS.
@@ -309,28 +324,27 @@ def get_service_from_reverse_dns_base_domain(base_domain,
If the service is unknown, the name will be
the supplied reverse_dns_base_domain and the type will be None
"""
+
def load_csv(_csv_file):
reader = csv.DictReader(_csv_file)
for row in reader:
key = row["base_reverse_dns"].lower().strip()
- reverse_dns_map[key] = dict(
- name=row["name"],
- type=row["type"])
+ reverse_dns_map[key] = dict(name=row["name"], type=row["type"])
base_domain = base_domain.lower().strip()
if url is None:
- url = ("https://raw.githubusercontent.com/domainaware"
- "/parsedmarc/master/parsedmarc/"
- "resources/maps/base_reverse_dns_map.csv")
+ url = (
+ "https://raw.githubusercontent.com/domainaware"
+ "/parsedmarc/master/parsedmarc/"
+ "resources/maps/base_reverse_dns_map.csv"
+ )
if reverse_dns_map is None:
reverse_dns_map = dict()
csv_file = io.StringIO()
- if (not (offline or always_use_local_file)
- and len(reverse_dns_map) == 0):
+ if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
try:
- logger.debug(f"Trying to fetch "
- f"reverse DNS map from {url}...")
+ logger.debug(f"Trying to fetch " f"reverse DNS map from {url}...")
csv_file.write(requests.get(url).text)
csv_file.seek(0)
load_csv(csv_file)
@@ -338,8 +352,9 @@ def load_csv(_csv_file):
logger.warning(f"Failed to fetch reverse DNS map: {e}")
if len(reverse_dns_map) == 0:
logger.info("Loading included reverse DNS map...")
- with pkg_resources.path(parsedmarc.resources.maps,
- "base_reverse_dns_map.csv") as path:
+ with pkg_resources.path(
+ parsedmarc.resources.maps, "base_reverse_dns_map.csv"
+ ) as path:
if local_file_path is not None:
path = local_file_path
with open(path) as csv_file:
@@ -352,15 +367,18 @@ def load_csv(_csv_file):
return service
-def get_ip_address_info(ip_address,
- ip_db_path=None,
- reverse_dns_map_path=None,
- always_use_local_files=False,
- reverse_dns_map_url=None,
- cache=None,
- reverse_dns_map=None,
- offline=False,
- nameservers=None, timeout=2.0):
+def get_ip_address_info(
+ ip_address,
+ ip_db_path=None,
+ reverse_dns_map_path=None,
+ always_use_local_files=False,
+ reverse_dns_map_url=None,
+ cache=None,
+ reverse_dns_map=None,
+ offline=False,
+ nameservers=None,
+ timeout=2.0,
+):
"""
Returns reverse DNS and country information for the given IP address
@@ -392,9 +410,9 @@ def get_ip_address_info(ip_address,
if offline:
reverse_dns = None
else:
- reverse_dns = get_reverse_dns(ip_address,
- nameservers=nameservers,
- timeout=timeout)
+ reverse_dns = get_reverse_dns(
+ ip_address, nameservers=nameservers, timeout=timeout
+ )
country = get_ip_address_country(ip_address, db_path=ip_db_path)
info["country"] = country
info["reverse_dns"] = reverse_dns
@@ -410,7 +428,8 @@ def get_ip_address_info(ip_address,
local_file_path=reverse_dns_map_path,
url=reverse_dns_map_url,
always_use_local_file=always_use_local_files,
- reverse_dns_map=reverse_dns_map)
+ reverse_dns_map=reverse_dns_map,
+ )
info["base_domain"] = base_domain
info["type"] = service["type"]
info["name"] = service["name"]
@@ -437,10 +456,14 @@ def parse_email_address(original_address):
local = address_parts[0].lower()
domain = address_parts[-1].lower()
- return OrderedDict([("display_name", display_name),
- ("address", address),
- ("local", local),
- ("domain", domain)])
+ return OrderedDict(
+ [
+ ("display_name", display_name),
+ ("address", address),
+ ("local", local),
+ ("domain", domain),
+ ]
+ )
def get_filename_safe_string(string):
@@ -453,8 +476,7 @@ def get_filename_safe_string(string):
Returns:
str: A string safe for a filename
"""
- invalid_filename_chars = ['\\', '/', ':', '"', '*', '?', '|', '\n',
- '\r']
+ invalid_filename_chars = ["\\", "/", ":", '"', "*", "?", "|", "\n", "\r"]
if string is None:
string = "None"
for char in invalid_filename_chars:
@@ -498,7 +520,8 @@ def is_outlook_msg(content):
bool: A flag that indicates if the file is an Outlook MSG file
"""
return isinstance(content, bytes) and content.startswith(
- b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")
+ b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
+ )
def convert_outlook_msg(msg_bytes):
@@ -520,14 +543,16 @@ def convert_outlook_msg(msg_bytes):
with open("sample.msg", "wb") as msg_file:
msg_file.write(msg_bytes)
try:
- subprocess.check_call(["msgconvert", "sample.msg"],
- stdout=null_file, stderr=null_file)
+ subprocess.check_call(
+ ["msgconvert", "sample.msg"], stdout=null_file, stderr=null_file
+ )
eml_path = "sample.eml"
with open(eml_path, "rb") as eml_file:
rfc822 = eml_file.read()
except FileNotFoundError:
raise EmailParserError(
- "Failed to convert Outlook MSG: msgconvert utility not found")
+ "Failed to convert Outlook MSG: msgconvert utility not found"
+ )
finally:
os.chdir(orig_dir)
shutil.rmtree(tmp_dir)
@@ -562,8 +587,7 @@ def parse_email(data, strip_attachment_payloads=False):
if received["date_utc"] is None:
del received["date_utc"]
else:
- received["date_utc"] = received["date_utc"].replace("T",
- " ")
+ received["date_utc"] = received["date_utc"].replace("T", " ")
if "from" not in parsed_email:
if "From" in parsed_email["headers"]:
@@ -579,33 +603,36 @@ def parse_email(data, strip_attachment_payloads=False):
else:
parsed_email["date"] = None
if "reply_to" in parsed_email:
- parsed_email["reply_to"] = list(map(lambda x: parse_email_address(x),
- parsed_email["reply_to"]))
+ parsed_email["reply_to"] = list(
+ map(lambda x: parse_email_address(x), parsed_email["reply_to"])
+ )
else:
parsed_email["reply_to"] = []
if "to" in parsed_email:
- parsed_email["to"] = list(map(lambda x: parse_email_address(x),
- parsed_email["to"]))
+ parsed_email["to"] = list(
+ map(lambda x: parse_email_address(x), parsed_email["to"])
+ )
else:
parsed_email["to"] = []
if "cc" in parsed_email:
- parsed_email["cc"] = list(map(lambda x: parse_email_address(x),
- parsed_email["cc"]))
+ parsed_email["cc"] = list(
+ map(lambda x: parse_email_address(x), parsed_email["cc"])
+ )
else:
parsed_email["cc"] = []
if "bcc" in parsed_email:
- parsed_email["bcc"] = list(map(lambda x: parse_email_address(x),
- parsed_email["bcc"]))
+ parsed_email["bcc"] = list(
+ map(lambda x: parse_email_address(x), parsed_email["bcc"])
+ )
else:
parsed_email["bcc"] = []
if "delivered_to" in parsed_email:
parsed_email["delivered_to"] = list(
- map(lambda x: parse_email_address(x),
- parsed_email["delivered_to"])
+ map(lambda x: parse_email_address(x), parsed_email["delivered_to"])
)
if "attachments" not in parsed_email:
@@ -622,9 +649,7 @@ def parse_email(data, strip_attachment_payloads=False):
payload = str.encode(payload)
attachment["sha256"] = hashlib.sha256(payload).hexdigest()
except Exception as e:
- logger.debug("Unable to decode attachment: {0}".format(
- e.__str__()
- ))
+ logger.debug("Unable to decode attachment: {0}".format(e.__str__()))
if strip_attachment_payloads:
for attachment in parsed_email["attachments"]:
if "payload" in attachment:
@@ -634,7 +659,8 @@ def parse_email(data, strip_attachment_payloads=False):
parsed_email["subject"] = None
parsed_email["filename_safe_subject"] = get_filename_safe_string(
- parsed_email["subject"])
+ parsed_email["subject"]
+ )
if "body" not in parsed_email:
parsed_email["body"] = None
diff --git a/parsedmarc/webhook.py b/parsedmarc/webhook.py
index 632bf5a7..b54ab650 100644
--- a/parsedmarc/webhook.py
+++ b/parsedmarc/webhook.py
@@ -4,10 +4,9 @@
class WebhookClient(object):
- """ A client for webhooks"""
+ """A client for webhooks"""
- def __init__(self, aggregate_url, forensic_url, smtp_tls_url,
- timeout=60):
+ def __init__(self, aggregate_url, forensic_url, smtp_tls_url, timeout=60):
"""
Initializes the WebhookClient
Args:
@@ -22,8 +21,8 @@ def __init__(self, aggregate_url, forensic_url, smtp_tls_url,
self.timeout = timeout
self.session = requests.Session()
self.session.headers = {
- 'User-Agent': 'parsedmarc',
- 'Content-Type': 'application/json',
+ "User-Agent": "parsedmarc",
+ "Content-Type": "application/json",
}
def save_forensic_report_to_webhook(self, report):
diff --git a/pyproject.toml b/pyproject.toml
index 9a98018f..d6d1d719 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
"imapclient>=2.1.0",
"kafka-python-ng>=2.2.2",
"lxml>=4.4.0",
- "mailsuite>=1.6.1",
+ "mailsuite>=1.9.17",
"msgraph-core==0.2.2",
"opensearch-py>=2.4.2,<=3.0.0",
"publicsuffixlist>=0.10.0",
diff --git a/requirements.txt b/requirements.txt
index 7483b230..ba292bb6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ elasticsearch<7.14.0
elasticsearch-dsl>=7.4.0
opensearch-py>=2.4.2,<=3.0.0
kafka-python-ng>=2.2.2
-mailsuite>=1.6.1
+mailsuite>=1.9.17
pygelf
nose>=1.3.7
wheel>=0.37.0
@@ -43,3 +43,4 @@ myst-parser[linkify]
requests
bs4
pytest
+
diff --git a/senders/README.md b/senders/README.md
deleted file mode 100644
index 6559090b..00000000
--- a/senders/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# About
-
-A database of information about services that send email.
-
-## Updates
-
-To update the database with the latest information from[ dmarc.io][dmarc.io], start a
-[Splash][splash] Docker container, then run `updatedb.py`.
-
-## Credit
-
-`senders.sqlite` contains information from [dmarc.io][dmarc.io] that is licensed under a
-[Creative Commons Attribution-ShareAlike 4.0 International License][cc].
-
-## License
-
-`senders.sqlite` is licensed under a
-[Creative Commons Attribution-ShareAlike 4.0 International License][cc].
-
-[dmarc.io]: https://dmarc.io/
-[splash]: https://splash.readthedocs.io/en/stable/
-[cc]: https://creativecommons.org/licenses/by-sa/4.0/
\ No newline at end of file
diff --git a/senders/__init__.py b/senders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/senders/senders.sqlite b/senders/senders.sqlite
deleted file mode 100644
index 43e8e0ce..00000000
Binary files a/senders/senders.sqlite and /dev/null differ
diff --git a/senders/updatedb.py b/senders/updatedb.py
deleted file mode 100644
index 2a8fc525..00000000
--- a/senders/updatedb.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import requests
-
-import sqlite3
-
-from bs4 import BeautifulSoup
-
-
-db = sqlite3.connect("senders.sqlite")
-db.execute("""
-CREATE TABLE IF NOT EXISTS "senders" (
- "id" INTEGER UNIQUE NOT NULL,
- "name" TEXT UNIQUE NOT NULL,
- "spf_aligned" INTEGER,
- "dkim_aligned" INTEGER,
- "known_to_forward" INTEGER,
- "forward_dkim_intact" INTEGER,
- "forward_own_envelope_domain" INTEGER,
- "support_url" TEXT,
- "dmarc_io_uri" TEXT UNIQUE,
- PRIMARY KEY("id" AUTOINCREMENT),
- CHECK("spf_aligned" = 0 or "spf_aligned" = 1),
- CHECK("dkim_aligned" = 0 or "dkim_aligned" = 1),
- CHECK("known_to_forward" = 0 or "known_to_forward" = 1),
- CHECK("forward_dkim_intact" = 0 or "forward_dkim_intact" = 1),
- CHECK(
- "forward_own_envelope_domain" = 0 or "forward_own_envelope_domain" = 1
- )
-)
-""")
-db.execute("""
-CREATE TABLE IF NOT EXISTS "reverse_dns" (
- "id" INTEGER UNIQUE NOT NULL,
- "base_domain" TEXT UNIQUE NOT NULL,
- "sender_id" INTEGER NOT NULL,
- PRIMARY KEY("id" AUTOINCREMENT),
- FOREIGN KEY(sender_id) REFERENCES senders(id)
-)
-""")
-curser = db.cursor()
-content = requests.get("http://localhost:8050/render.html",
- params=dict(url="https://dmarc.io/sources/")).content
-soup = BeautifulSoup(content, "html.parser")
-table = soup.find("tbody")
-rows = table.find_all("tr")
-for row in rows:
- data = row.find_all("td")
- link = data[0].find("a")
- name = link.text
- dmarc_io_uri = link.get("href")
- spf_aligned = len(data[1].find_all("i"))
- dkim_aligned = len(data[2].find_all("i"))
- params = (name, spf_aligned, dkim_aligned, 0,
- dmarc_io_uri)
- curser.execute("""
- UPDATE senders
- SET name = ?,
- spf_aligned = ?,
- dkim_aligned = ?,
- known_to_forward = ?
- WHERE dmarc_io_uri = ?""", params)
- db.commit()
- curser.execute("""
- INSERT OR IGNORE INTO senders(name, spf_aligned, dkim_aligned,
- known_to_forward, dmarc_io_uri) values (?,?,?,?,?)""", params)
- db.commit()
-content = requests.get("http://localhost:8050/render.html",
- params=dict(url="https://dmarc.io/forwarders/")).content
-soup = BeautifulSoup(content, "html.parser")
-table = soup.find("tbody")
-rows = table.find_all("tr")
-for row in rows:
- data = row.find_all("td")
- link = data[0].find("a")
- name = link.text
- dmarc_io_uri = link.get("href")
- forward_dkim_intact = len(data[1].find_all("i"))
- forward_own_envelope_domain = len(data[2].find_all("i"))
- params = (name, forward_dkim_intact, forward_own_envelope_domain, 1,
- dmarc_io_uri)
- curser.execute("""
- UPDATE senders
- SET name = ?,
- forward_dkim_intact = ?,
- forward_own_envelope_domain = ?,
- known_to_forward = ?
- WHERE dmarc_io_uri = ?""", params)
- db.commit()
- curser.execute("""
- INSERT OR IGNORE INTO senders(name, spf_aligned, dkim_aligned,
- known_to_forward, dmarc_io_uri) values (?,?,?,?,?)""", params)
- db.commit()
diff --git a/tests.py b/tests.py
index 464e63a3..5d32b1e3 100644
--- a/tests.py
+++ b/tests.py
@@ -12,14 +12,14 @@
def minify_xml(xml_string):
parser = etree.XMLParser(remove_blank_text=True)
- tree = etree.fromstring(xml_string.encode('utf-8'), parser)
- return etree.tostring(tree, pretty_print=False).decode('utf-8')
+ tree = etree.fromstring(xml_string.encode("utf-8"), parser)
+ return etree.tostring(tree, pretty_print=False).decode("utf-8")
def compare_xml(xml1, xml2):
parser = etree.XMLParser(remove_blank_text=True)
- tree1 = etree.fromstring(xml1.encode('utf-8'), parser)
- tree2 = etree.fromstring(xml2.encode('utf-8'), parser)
+ tree1 = etree.fromstring(xml1.encode("utf-8"), parser)
+ tree2 = etree.fromstring(xml2.encode("utf-8"), parser)
return etree.tostring(tree1) == etree.tostring(tree2)
@@ -46,8 +46,7 @@ def testExtractReportXMLComparator(self):
print()
xmlnice = open("samples/extract_report/nice-input.xml").read()
print(xmlnice)
- xmlchanged = minify_xml(open(
- "samples/extract_report/changed-input.xml").read())
+ xmlchanged = minify_xml(open("samples/extract_report/changed-input.xml").read())
print(xmlchanged)
self.assertTrue(compare_xml(xmlnice, xmlnice))
self.assertTrue(compare_xml(xmlchanged, xmlchanged))
@@ -59,9 +58,9 @@ def testExtractReportBytes(self):
"""Test extract report function for bytes string input"""
print()
file = "samples/extract_report/nice-input.xml"
- with open(file, 'rb') as f:
+ with open(file, "rb") as f:
data = f.read()
- print("Testing {0}: " .format(file), end="")
+ print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report(data)
xmlin = open("samples/extract_report/nice-input.xml").read()
self.assertTrue(compare_xml(xmlout, xmlin))
@@ -71,7 +70,7 @@ def testExtractReportXML(self):
"""Test extract report function for XML input"""
print()
file = "samples/extract_report/nice-input.xml"
- print("Testing {0}: " .format(file), end="")
+ print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report(file)
xmlin = open("samples/extract_report/nice-input.xml").read()
self.assertTrue(compare_xml(xmlout, xmlin))
@@ -81,7 +80,7 @@ def testExtractReportGZip(self):
"""Test extract report function for gzip input"""
print()
file = "samples/extract_report/nice-input.xml.gz"
- print("Testing {0}: " .format(file), end="")
+ print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report_from_file_path(file)
xmlin = open("samples/extract_report/nice-input.xml").read()
self.assertTrue(compare_xml(xmlout, xmlin))
@@ -91,15 +90,13 @@ def testExtractReportZip(self):
"""Test extract report function for zip input"""
print()
file = "samples/extract_report/nice-input.xml.zip"
- print("Testing {0}: " .format(file), end="")
+ print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report_from_file_path(file)
print(xmlout)
- xmlin = minify_xml(open(
- "samples/extract_report/nice-input.xml").read())
+ xmlin = minify_xml(open("samples/extract_report/nice-input.xml").read())
print(xmlin)
self.assertTrue(compare_xml(xmlout, xmlin))
- xmlin = minify_xml(open(
- "samples/extract_report/changed-input.xml").read())
+ xmlin = minify_xml(open("samples/extract_report/changed-input.xml").read())
print(xmlin)
self.assertFalse(compare_xml(xmlout, xmlin))
print("Passed!")
@@ -111,16 +108,17 @@ def testAggregateSamples(self):
for sample_path in sample_paths:
if os.path.isdir(sample_path):
continue
- print("Testing {0}: " .format(sample_path), end="")
+ print("Testing {0}: ".format(sample_path), end="")
parsed_report = parsedmarc.parse_report_file(
- sample_path, always_use_local_files=True)["report"]
+ sample_path, always_use_local_files=True
+ )["report"]
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
print("Passed!")
def testEmptySample(self):
"""Test empty/unparasable report"""
with self.assertRaises(parsedmarc.ParserError):
- parsedmarc.parse_report_file('samples/empty.xml')
+ parsedmarc.parse_report_file("samples/empty.xml")
def testForensicSamples(self):
"""Test sample forensic/ruf/failure DMARC reports"""
@@ -130,10 +128,8 @@ def testForensicSamples(self):
print("Testing {0}: ".format(sample_path), end="")
with open(sample_path) as sample_file:
sample_content = sample_file.read()
- parsed_report = parsedmarc.parse_report_email(
- sample_content)["report"]
- parsed_report = parsedmarc.parse_report_file(
- sample_path)["report"]
+ parsed_report = parsedmarc.parse_report_email(sample_content)["report"]
+ parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
print("Passed!")
@@ -144,9 +140,8 @@ def testSmtpTlsSamples(self):
for sample_path in sample_paths:
if os.path.isdir(sample_path):
continue
- print("Testing {0}: " .format(sample_path), end="")
- parsed_report = parsedmarc.parse_report_file(
- sample_path)["report"]
+ print("Testing {0}: ".format(sample_path), end="")
+ parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
print("Passed!")