Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

To support Lambda customized log group #709

Merged
merged 7 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions aws/logs_monitoring/customized_log_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import re

"""
Customized log group is a log group shared by multiple applications of the same type. Based on the feedback from AWS,
customers may name the log group arbitrarily. E.g they can name a lambda log group as "/aws/vendedlogs/states/**", which is typically used for Stepfunctions
In addition, potentially, not just Lambda, any other AWS services can use a customized log group.
The workaround is to parse the logstream_name to get the source of logs.
"""

# Example: "2023/11/06/test-customized-log-group1[$LATEST]13e304cba4b9446eb7ef082a00038990"
REX_LAMBDA_CUSTOMIZE_LOGSTREAM_NAME_PATTERN = re.compile(
"^[0-9]{4}\\/[01][0-9]\\/[0-3][0-9]\\/[0-9a-zA-Z_.-]{1,75}\\[(?:\\$LATEST|[0-9A-Za-z_-]{1,129})\\][0-9a-f]{32}$"
)


def is_lambda_customized_log_group(logstream_name):
return (
REX_LAMBDA_CUSTOMIZE_LOGSTREAM_NAME_PATTERN.fullmatch(logstream_name)
is not None
)


def get_lambda_function_name_from_logstream_name(logstream_name):
try:
# Not match the pattern for customized Lambda log group
if not is_lambda_customized_log_group(logstream_name):
return None
leftSquareBracketPos = logstream_name.index("[")
lastForwardSlashPos = logstream_name.rindex("/")
return logstream_name[lastForwardSlashPos + 1 : leftSquareBracketPos]
except:
return None
71 changes: 50 additions & 21 deletions aws/logs_monitoring/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

from datadog_lambda.metric import lambda_stats

from customized_log_group import (
get_lambda_function_name_from_logstream_name,
is_lambda_customized_log_group,
)
from step_functions_cache import StepFunctionsTagsCache
from cloudwatch_log_group_cache import CloudwatchLogGroupTagsCache
from telemetry import (
Expand Down Expand Up @@ -495,6 +499,12 @@ def awslogs_handler(event, context, metadata):
source = "transitgateway"
metadata[DD_SOURCE] = parse_event_source(event, source)

# Special handling for customized log group of Lambda functions
# Multiple Lambda functions can share one single customized log group
# Need to parse logStream name to determine whether it is a Lambda function
if is_lambda_customized_log_group(logs["logStream"]):
metadata[DD_SOURCE] = "lambda"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might not be a concern, but I worry about running a regular expression match on every Lambda log event.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought, it might be the best way...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The format for logstream names varies on the source of the logs. I originally expect it to be something more consistent, but it is not.

# Build aws attributes
aws_attributes = {
"aws": {
Expand Down Expand Up @@ -570,28 +580,8 @@ def awslogs_handler(event, context, metadata):

# For Lambda logs we want to extract the function name,
# then rebuild the arn of the monitored lambda using that name.
# Start by splitting the log group to get the function name
if metadata[DD_SOURCE] == "lambda":
log_group_parts = logs["logGroup"].split("/lambda/")
if len(log_group_parts) > 1:
lowercase_function_name = log_group_parts[1].lower()
# Split the arn of the forwarder to extract the prefix
arn_parts = context.invoked_function_arn.split("function:")
if len(arn_parts) > 0:
arn_prefix = arn_parts[0]
# Rebuild the arn with the lowercased function name
lowercase_arn = arn_prefix + "function:" + lowercase_function_name
# Add the lowercased arn as a log attribute
arn_attributes = {"lambda": {"arn": lowercase_arn}}
aws_attributes = merge_dicts(aws_attributes, arn_attributes)

env_tag_exists = (
metadata[DD_CUSTOM_TAGS].startswith("env:")
or ",env:" in metadata[DD_CUSTOM_TAGS]
)
# If there is no env specified, default to env:none
if not env_tag_exists:
metadata[DD_CUSTOM_TAGS] += ",env:none"
process_lambda_logs(logs, aws_attributes, context, metadata)

# The EKS log group contains various sources from the K8S control plane.
# In order to have these automatically trigger the correct pipelines they
Expand Down Expand Up @@ -863,3 +853,42 @@ def get_state_machine_arn(message):
arn_tokens[5] = "stateMachine"
return ":".join(arn_tokens[:7])
return ""


# Lambda logs can be from either default or customized log group
def process_lambda_logs(logs, aws_attributes, context, metadata):
lower_cased_lambda_function_name = get_lower_cased_lambda_function_name(logs)
if lower_cased_lambda_function_name is None:
return
# Split the arn of the forwarder to extract the prefix
arn_parts = context.invoked_function_arn.split("function:")
if len(arn_parts) > 0:
arn_prefix = arn_parts[0]
# Rebuild the arn with the lowercased function name
lower_cased_lambda__arn = (
arn_prefix + "function:" + lower_cased_lambda_function_name
)
# Add the lowe_rcased arn as a log attribute
arn_attributes = {"lambda": {"arn": lower_cased_lambda__arn}}
aws_attributes = merge_dicts(aws_attributes, arn_attributes)
env_tag_exists = (
metadata[DD_CUSTOM_TAGS].startswith("env:")
or ",env:" in metadata[DD_CUSTOM_TAGS]
)
# If there is no env specified, default to env:none
if not env_tag_exists:
metadata[DD_CUSTOM_TAGS] += ",env:none"


# The lambda function name can be inferred from either a customized logstream name, or a loggroup name
def get_lower_cased_lambda_function_name(logs):
logstream_name = logs["logStream"]
# function name parsed from logstream is preferred for handling some edge cases
function_name = get_lambda_function_name_from_logstream_name(logstream_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than run this regex match a second time for every lambda log event, does it make sense to set the function_name in awslogs_handler based on whether we've discovered it to be a custom log group or not?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking of avoiding pattern matching twice, and it turns out that in order to be compatible with the existing logics for default log group, the codes can be more verbose and hard to maintain. I tried to refactor the lambda log processing logic out into a separate function at the cost of it.

if function_name is None:
log_group_parts = logs["logGroup"].split("/lambda/")
if len(log_group_parts) > 1:
function_name = log_group_parts[1]
else:
return None
return function_name.lower()
60 changes: 60 additions & 0 deletions aws/logs_monitoring/tests/test_customized_log_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest
from customized_log_group import (
is_lambda_customized_log_group,
get_lambda_function_name_from_logstream_name,
)


class TestCustomizedLogGroup(unittest.TestCase):
def test_is_lambda_customized_log_group(self):
# default log group for lambda
default_log_stream_name = "2023/11/04/[$LATEST]4426346c2cdf4c54a74d3bd2b929fc44"
self.assertEqual(is_lambda_customized_log_group(default_log_stream_name), False)
# customized log group for lambda LATEST
customized_log_stream_name_latest = "2023/11/06/test-customized-log-group1[$LATEST]13e304cba4b9446eb7ef082a00038990"
self.assertEqual(
is_lambda_customized_log_group(customized_log_stream_name_latest), True
)
# customized log group for lambda
customized_log_stream_name_version = "2023/11/06/test-customized-log-group1[version2023_11]13e304cba4b9446eb7ef082a00038990"
self.assertEqual(
is_lambda_customized_log_group(customized_log_stream_name_version), True
)
# stepfunction log stream
stepfunction_log_stream_name = (
"states/rc-auto-statemachine-staging/2023-11-14-20-05/507c6089"
)
self.assertEqual(
is_lambda_customized_log_group(stepfunction_log_stream_name), False
)

def get_lambda_function_name_from_logstream_name(self):
# default log group for lambda
default_log_stream_name = "2023/11/04/[$LATEST]4426346c2cdf4c54a74d3bd2b929fc44"
self.assertEqual(
get_lambda_function_name_from_logstream_name(default_log_stream_name), None
)
# customized log group for lambda LATEST
customized_log_stream_name_latest = "2023/11/06/test-customized-log-group1[$LATEST]13e304cba4b9446eb7ef082a00038990"
self.assertEqual(
get_lambda_function_name_from_logstream_name(
customized_log_stream_name_latest
),
"test-customized-log-group1",
)
# customized log group for lambda
customized_log_stream_name_version = "2023/11/06/test-customized-log-group1[version2023_11]13e304cba4b9446eb7ef082a00038990"
self.assertEqual(
get_lambda_function_name_from_logstream_name(
customized_log_stream_name_version
),
"test-customized-log-group1",
)
# stepfunction log stream
stepfunction_log_stream_name = (
"states/rc-auto-statemachine-staging/2023-11-14-20-05/507c6089"
)
self.assertEqual(
get_lambda_function_name_from_logstream_name(stepfunction_log_stream_name),
None,
)
36 changes: 36 additions & 0 deletions aws/logs_monitoring/tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
parse_aws_waf_logs,
get_service_from_tags,
get_state_machine_arn,
get_lower_cased_lambda_function_name,
)
from settings import (
DD_CUSTOM_TAGS,
Expand Down Expand Up @@ -997,5 +998,40 @@ def test_get_state_machine_arn(self):
)


class TestLambdaCustomizedLogGroup(unittest.TestCase):
def test_get_lower_cased_lambda_function_name(self):
self.assertEqual(True, True)
# Non Lambda log
stepfunction_loggroup = {
"messageType": "DATA_MESSAGE",
"logGroup": "/aws/vendedlogs/states/logs-to-traces-sequential-Logs",
"logStream": "states/logs-to-traces-sequential/2022-11-10-15-50/7851b2d9",
"logEvents": [],
}
self.assertEqual(
get_lower_cased_lambda_function_name(stepfunction_loggroup), None
)
lambda_default_loggroup = {
"messageType": "DATA_MESSAGE",
"logGroup": "/aws/lambda/test-lambda-default-log-group",
"logStream": "2023/11/06/[$LATEST]b25b1f977b3e416faa45a00f427e7acb",
"logEvents": [],
}
self.assertEqual(
get_lower_cased_lambda_function_name(lambda_default_loggroup),
"test-lambda-default-log-group",
)
lambda_customized_loggroup = {
"messageType": "DATA_MESSAGE",
"logGroup": "customizeLambdaGrop",
"logStream": "2023/11/06/test-customized-log-group1[$LATEST]13e304cba4b9446eb7ef082a00038990",
"logEvents": [],
}
self.assertEqual(
get_lower_cased_lambda_function_name(lambda_customized_loggroup),
"test-customized-log-group1",
)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"messageType": "DATA_MESSAGE",
"owner": "601427279990",
"logGroup": "/aws/vendedlogs/states/anyLogGroupName",
"logStream": "2020/03/05/test-customized-loggroup[$LATEST]20bddfd5a2dc4c6b97ac02800eae90d0",
"subscriptionFilters": ["some-filter-name"],
"logEvents": [
{
"id": "35311576111948622874033876462979853992919938886093242368",
"timestamp": 1583425836114,
"message": "2020-03-05T16:30:36.113Z\tf08bb4c8-d6b2-4f05-ac17-af7e2ba005fb\tDEBUG\t[dd.trace_id=3172564172058669914 dd.span_id=14292093692483532556] {\"status\":\"debug\",\"message\":\"datadog:Patched console output with trace context\"}\n"
},
{
"id": "35311576111948622874033876462979853992919938886093242369",
"timestamp": 1583425836114,
"message":"2020-03-05T16:30:36.114Z\tf08bb4c8-d6b2-4f05-ac17-af7e2ba005fb\tDEBUG\t[dd.trace_id=3172564172058669914 dd.span_id=14292093692483532556] {\"autoPatchHTTP\":true,\"tracerInitialized\":true,\"status\":\"debug\",\"message\":\"datadog:Not patching HTTP libraries\"}\n"
},
{
"id": "35311576111948622874033876462979853992919938886093242370",
"timestamp": 1583425836114,
"message": "2020-03-05T16:30:36.114Z\tf08bb4c8-d6b2-4f05-ac17-af7e2ba005fb\tDEBUG\t[dd.trace_id=3172564172058669914 dd.span_id=14292093692483532556] {\"status\":\"debug\",\"message\":\"datadog:Reading trace context from env var Root=1-5e61292c-cc1229a4dfbeae1043928548;Parent=c657b77d9514f70c;Sampled=1\"}\n"
}
]
}
Loading