Skip to content

Commit

Permalink
fix matching multiline regex in s3 handler
Browse files Browse the repository at this point in the history
  • Loading branch information
ge0Aja committed Sep 18, 2024
1 parent 549f749 commit e72fc0d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 2 deletions.
9 changes: 7 additions & 2 deletions aws/logs_monitoring/steps/handlers/s3_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def __init__(self, context, metadata, cache_layer):
if DD_MULTILINE_LOG_REGEX_PATTERN
else None
)
self.multiline_regex_pattern = (
re.compile("[\n\r\f]+(?={})".format(DD_MULTILINE_LOG_REGEX_PATTERN))
if DD_MULTILINE_LOG_REGEX_PATTERN
else None
)
# a private data store for event attributes
self.data_store = S3EventDataStore()

Expand Down Expand Up @@ -283,12 +288,12 @@ def _extract_cloudtrail_logs(self):
def _extract_other_logs(self):
# Check if using multiline log regex pattern
# and determine whether line or pattern separated logs
if self.multiline_regex_start_pattern:
if self.multiline_regex_start_pattern and self.multiline_regex_pattern:
# We'll do string manipulation, so decode bytes into utf-8 first
self.data_store.data = self.data_store.data.decode("utf-8", errors="ignore")

if self.multiline_regex_start_pattern.match(self.data_store.data):
self.data_store.data = self.multiline_regex_start_pattern.split(
self.data_store.data = self.multiline_regex_pattern.split(
self.data_store.data
)
else:
Expand Down
39 changes: 39 additions & 0 deletions aws/logs_monitoring/tests/test_s3_handler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import gzip
import unittest
import re
from unittest.mock import MagicMock, patch
from importlib import reload

from approvaltests.combination_approvals import verify_all_combinations
from caching.cache_layer import CacheLayer
Expand Down Expand Up @@ -108,6 +110,43 @@ def test_s3_handler(self):
self.assertEqual(self.s3_handler.metadata["ddsource"], "s3")
self.assertEqual(self.s3_handler.metadata["host"], "arn:aws:s3:::my-bucket")

def test_s3_handler_with_multiline_regex(self):
event = {
"Records": [
{
"s3": {
"bucket": {"name": "my-bucket"},
"object": {"key": "my-key"},
}
}
]
}
data = "2022-02-08aaa\nbbbccc\n2022-02-09bbb\n2022-02-10ccc\n"
self.s3_handler.data_store.data = data.encode("utf-8")
self.s3_handler.multiline_regex_start_pattern = re.compile("^\d{4}-\d{2}-\d{2}")
self.s3_handler.multiline_regex_pattern = re.compile(
"[\n\r\f]+(?=\d{4}-\d{2}-\d{2})"
)
self.s3_handler._extract_data = MagicMock()
structured_lines = list(self.s3_handler.handle(event))
self.assertEqual(
structured_lines,
[
{
"aws": {"s3": {"bucket": "my-bucket", "key": "my-key"}},
"message": "2022-02-08aaa\nbbbccc",
},
{
"aws": {"s3": {"bucket": "my-bucket", "key": "my-key"}},
"message": "2022-02-09bbb",
},
{
"aws": {"s3": {"bucket": "my-bucket", "key": "my-key"}},
"message": "2022-02-10ccc\n",
},
],
)

def test_s3_handler_with_sns(self):
event = {
"Records": [
Expand Down

0 comments on commit e72fc0d

Please sign in to comment.