-
Notifications
You must be signed in to change notification settings - Fork 387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
To support Lambda customized log group #709
Changes from all commits
0c21250
3e6cb3f
4d01e9d
02fb936
70bc635
90478ff
0407528
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import re | ||
|
||
""" | ||
Customized log group is a log group shared by multiple applications of the same type. Based on the feedback from AWS, | ||
customers may name the log group arbitrarily. E.g they can name a lambda log group as "/aws/vendedlogs/states/**", which is typically used for Stepfunctions | ||
In addition, potentially, not just Lambda, any other AWS services can use a customized log group. | ||
The workaround is to parse the logstream_name to get the source of logs. | ||
""" | ||
|
||
# Example: "2023/11/06/test-customized-log-group1[$LATEST]13e304cba4b9446eb7ef082a00038990" | ||
REX_LAMBDA_CUSTOMIZE_LOGSTREAM_NAME_PATTERN = re.compile( | ||
"^[0-9]{4}\\/[01][0-9]\\/[0-3][0-9]\\/[0-9a-zA-Z_.-]{1,75}\\[(?:\\$LATEST|[0-9A-Za-z_-]{1,129})\\][0-9a-f]{32}$" | ||
) | ||
|
||
|
||
def is_lambda_customized_log_group(logstream_name): | ||
return ( | ||
REX_LAMBDA_CUSTOMIZE_LOGSTREAM_NAME_PATTERN.fullmatch(logstream_name) | ||
is not None | ||
) | ||
|
||
|
||
def get_lambda_function_name_from_logstream_name(logstream_name): | ||
try: | ||
# Not match the pattern for customized Lambda log group | ||
if not is_lambda_customized_log_group(logstream_name): | ||
return None | ||
leftSquareBracketPos = logstream_name.index("[") | ||
lastForwardSlashPos = logstream_name.rindex("/") | ||
return logstream_name[lastForwardSlashPos + 1 : leftSquareBracketPos] | ||
except: | ||
return None |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,10 @@ | |
|
||
from datadog_lambda.metric import lambda_stats | ||
|
||
from customized_log_group import ( | ||
get_lambda_function_name_from_logstream_name, | ||
is_lambda_customized_log_group, | ||
) | ||
from step_functions_cache import StepFunctionsTagsCache | ||
from cloudwatch_log_group_cache import CloudwatchLogGroupTagsCache | ||
from telemetry import ( | ||
|
@@ -495,6 +499,12 @@ def awslogs_handler(event, context, metadata): | |
source = "transitgateway" | ||
metadata[DD_SOURCE] = parse_event_source(event, source) | ||
|
||
# Special handling for customized log group of Lambda functions | ||
# Multiple Lambda functions can share one single customized log group | ||
# Need to parse logStream name to determine whether it is a Lambda function | ||
if is_lambda_customized_log_group(logs["logStream"]): | ||
metadata[DD_SOURCE] = "lambda" | ||
|
||
# Build aws attributes | ||
aws_attributes = { | ||
"aws": { | ||
|
@@ -570,28 +580,8 @@ def awslogs_handler(event, context, metadata): | |
|
||
# For Lambda logs we want to extract the function name, | ||
# then rebuild the arn of the monitored lambda using that name. | ||
# Start by splitting the log group to get the function name | ||
if metadata[DD_SOURCE] == "lambda": | ||
log_group_parts = logs["logGroup"].split("/lambda/") | ||
if len(log_group_parts) > 1: | ||
lowercase_function_name = log_group_parts[1].lower() | ||
# Split the arn of the forwarder to extract the prefix | ||
arn_parts = context.invoked_function_arn.split("function:") | ||
if len(arn_parts) > 0: | ||
arn_prefix = arn_parts[0] | ||
# Rebuild the arn with the lowercased function name | ||
lowercase_arn = arn_prefix + "function:" + lowercase_function_name | ||
# Add the lowercased arn as a log attribute | ||
arn_attributes = {"lambda": {"arn": lowercase_arn}} | ||
aws_attributes = merge_dicts(aws_attributes, arn_attributes) | ||
|
||
env_tag_exists = ( | ||
metadata[DD_CUSTOM_TAGS].startswith("env:") | ||
or ",env:" in metadata[DD_CUSTOM_TAGS] | ||
) | ||
# If there is no env specified, default to env:none | ||
if not env_tag_exists: | ||
metadata[DD_CUSTOM_TAGS] += ",env:none" | ||
process_lambda_logs(logs, aws_attributes, context, metadata) | ||
|
||
# The EKS log group contains various sources from the K8S control plane. | ||
# In order to have these automatically trigger the correct pipelines they | ||
|
@@ -863,3 +853,42 @@ def get_state_machine_arn(message): | |
arn_tokens[5] = "stateMachine" | ||
return ":".join(arn_tokens[:7]) | ||
return "" | ||
|
||
|
||
# Lambda logs can be from either default or customized log group | ||
def process_lambda_logs(logs, aws_attributes, context, metadata): | ||
lower_cased_lambda_function_name = get_lower_cased_lambda_function_name(logs) | ||
if lower_cased_lambda_function_name is None: | ||
return | ||
# Split the arn of the forwarder to extract the prefix | ||
arn_parts = context.invoked_function_arn.split("function:") | ||
if len(arn_parts) > 0: | ||
arn_prefix = arn_parts[0] | ||
# Rebuild the arn with the lowercased function name | ||
lower_cased_lambda__arn = ( | ||
arn_prefix + "function:" + lower_cased_lambda_function_name | ||
) | ||
# Add the lowe_rcased arn as a log attribute | ||
arn_attributes = {"lambda": {"arn": lower_cased_lambda__arn}} | ||
aws_attributes = merge_dicts(aws_attributes, arn_attributes) | ||
env_tag_exists = ( | ||
metadata[DD_CUSTOM_TAGS].startswith("env:") | ||
or ",env:" in metadata[DD_CUSTOM_TAGS] | ||
) | ||
# If there is no env specified, default to env:none | ||
if not env_tag_exists: | ||
metadata[DD_CUSTOM_TAGS] += ",env:none" | ||
|
||
|
||
# The lambda function name can be inferred from either a customized logstream name, or a loggroup name | ||
def get_lower_cased_lambda_function_name(logs): | ||
logstream_name = logs["logStream"] | ||
# function name parsed from logstream is preferred for handling some edge cases | ||
function_name = get_lambda_function_name_from_logstream_name(logstream_name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than run this regex match a second time for every lambda log event, does it make sense to set the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking of avoiding pattern matching twice, and it turns out that in order to be compatible with the existing logics for default log group, the codes can be more verbose and hard to maintain. I tried to refactor the lambda log processing logic out into a separate function at the cost of it. |
||
if function_name is None: | ||
log_group_parts = logs["logGroup"].split("/lambda/") | ||
if len(log_group_parts) > 1: | ||
function_name = log_group_parts[1] | ||
else: | ||
return None | ||
return function_name.lower() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import unittest | ||
from customized_log_group import ( | ||
is_lambda_customized_log_group, | ||
get_lambda_function_name_from_logstream_name, | ||
) | ||
|
||
|
||
class TestCustomizedLogGroup(unittest.TestCase): | ||
def test_is_lambda_customized_log_group(self): | ||
# default log group for lambda | ||
default_log_stream_name = "2023/11/04/[$LATEST]4426346c2cdf4c54a74d3bd2b929fc44" | ||
self.assertEqual(is_lambda_customized_log_group(default_log_stream_name), False) | ||
# customized log group for lambda LATEST | ||
customized_log_stream_name_latest = "2023/11/06/test-customized-log-group1[$LATEST]13e304cba4b9446eb7ef082a00038990" | ||
self.assertEqual( | ||
is_lambda_customized_log_group(customized_log_stream_name_latest), True | ||
) | ||
# customized log group for lambda | ||
customized_log_stream_name_version = "2023/11/06/test-customized-log-group1[version2023_11]13e304cba4b9446eb7ef082a00038990" | ||
self.assertEqual( | ||
is_lambda_customized_log_group(customized_log_stream_name_version), True | ||
) | ||
# stepfunction log stream | ||
stepfunction_log_stream_name = ( | ||
"states/rc-auto-statemachine-staging/2023-11-14-20-05/507c6089" | ||
) | ||
self.assertEqual( | ||
is_lambda_customized_log_group(stepfunction_log_stream_name), False | ||
) | ||
|
||
def get_lambda_function_name_from_logstream_name(self): | ||
# default log group for lambda | ||
default_log_stream_name = "2023/11/04/[$LATEST]4426346c2cdf4c54a74d3bd2b929fc44" | ||
self.assertEqual( | ||
get_lambda_function_name_from_logstream_name(default_log_stream_name), None | ||
) | ||
# customized log group for lambda LATEST | ||
customized_log_stream_name_latest = "2023/11/06/test-customized-log-group1[$LATEST]13e304cba4b9446eb7ef082a00038990" | ||
self.assertEqual( | ||
get_lambda_function_name_from_logstream_name( | ||
customized_log_stream_name_latest | ||
), | ||
"test-customized-log-group1", | ||
) | ||
# customized log group for lambda | ||
customized_log_stream_name_version = "2023/11/06/test-customized-log-group1[version2023_11]13e304cba4b9446eb7ef082a00038990" | ||
self.assertEqual( | ||
get_lambda_function_name_from_logstream_name( | ||
customized_log_stream_name_version | ||
), | ||
"test-customized-log-group1", | ||
) | ||
# stepfunction log stream | ||
stepfunction_log_stream_name = ( | ||
"states/rc-auto-statemachine-staging/2023-11-14-20-05/507c6089" | ||
) | ||
self.assertEqual( | ||
get_lambda_function_name_from_logstream_name(stepfunction_log_stream_name), | ||
None, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"messageType": "DATA_MESSAGE", | ||
"owner": "601427279990", | ||
"logGroup": "/aws/vendedlogs/states/anyLogGroupName", | ||
"logStream": "2020/03/05/test-customized-loggroup[$LATEST]20bddfd5a2dc4c6b97ac02800eae90d0", | ||
"subscriptionFilters": ["some-filter-name"], | ||
"logEvents": [ | ||
{ | ||
"id": "35311576111948622874033876462979853992919938886093242368", | ||
"timestamp": 1583425836114, | ||
"message": "2020-03-05T16:30:36.113Z\tf08bb4c8-d6b2-4f05-ac17-af7e2ba005fb\tDEBUG\t[dd.trace_id=3172564172058669914 dd.span_id=14292093692483532556] {\"status\":\"debug\",\"message\":\"datadog:Patched console output with trace context\"}\n" | ||
}, | ||
{ | ||
"id": "35311576111948622874033876462979853992919938886093242369", | ||
"timestamp": 1583425836114, | ||
"message":"2020-03-05T16:30:36.114Z\tf08bb4c8-d6b2-4f05-ac17-af7e2ba005fb\tDEBUG\t[dd.trace_id=3172564172058669914 dd.span_id=14292093692483532556] {\"autoPatchHTTP\":true,\"tracerInitialized\":true,\"status\":\"debug\",\"message\":\"datadog:Not patching HTTP libraries\"}\n" | ||
}, | ||
{ | ||
"id": "35311576111948622874033876462979853992919938886093242370", | ||
"timestamp": 1583425836114, | ||
"message": "2020-03-05T16:30:36.114Z\tf08bb4c8-d6b2-4f05-ac17-af7e2ba005fb\tDEBUG\t[dd.trace_id=3172564172058669914 dd.span_id=14292093692483532556] {\"status\":\"debug\",\"message\":\"datadog:Reading trace context from env var Root=1-5e61292c-cc1229a4dfbeae1043928548;Parent=c657b77d9514f70c;Sampled=1\"}\n" | ||
} | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might not be a concern, but I worry about running a regular expression match on every Lambda log event.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On second thought, it might be the best way...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The format for logstream names varies on the source of the logs. I originally expect it to be something more consistent, but it is not.