-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
replace multi-level approach to tagging approach
- Loading branch information
Showing
3 changed files
with
55 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,58 @@ | ||
from traitlets import HasTraits, validate, Set | ||
from pythonjsonlogger import jsonlogger | ||
|
||
|
||
EVENT_MAP = { | ||
'unrestricted': 0, | ||
'user-identifier': 10, | ||
'user-identifiable-information': 20 | ||
} | ||
|
||
EVENT_LEVELS = list(EVENT_MAP.keys()) | ||
|
||
|
||
class JsonEventFormatter(jsonlogger.JsonFormatter): | ||
"""Patch the jsonlogger formatter to include levels for telemetry. | ||
Properties in a logged event that has a level less than | ||
the handler's event_level will be dropped from the emitted event. | ||
""" | ||
|
||
def __init__(self, logger, handler, *args, **kwargs): | ||
self.logger = logger | ||
self.handler = handler | ||
# Set the event logging level | ||
self.event_level = getattr(handler, 'event_level', None) | ||
super(JsonEventFormatter, self).__init__(*args, **kwargs) | ||
|
||
# Protect the event_level attribute. | ||
@property | ||
def event_level(self): | ||
"""Event Log security level.""" | ||
return self._event_level | ||
def allowed_tags(self): | ||
return getattr(self.handler, 'allowed_tags', {}) | ||
|
||
@event_level.setter | ||
def event_level(self, event_level): | ||
# Check that the event level makes sense. | ||
if event_level not in EVENT_LEVELS: | ||
raise Exception("Event level '{}' not understood.".format(event_level)) | ||
self._event_level = self.handler.event_level | ||
@property | ||
def hashed_tags(self): | ||
return getattr(self.handler, 'hashed_tags', {}) | ||
|
||
def process_log_record(self, log_record): | ||
log_record = super(JsonEventFormatter, self).process_log_record(log_record) | ||
return self.process_event_levels(log_record) | ||
return self.process_tags(log_record) | ||
|
||
def process_event_levels(self, log_record): | ||
"""Removes any properties in a log_record that have an attribute `pii = True`. | ||
def drop_property(self, key, record): | ||
del record[key] | ||
return record | ||
|
||
def hash_property(self, key, record): | ||
hash_function = lambda x: x | ||
record[key] = hash_function(record[key]) | ||
return record | ||
|
||
def process_tags(self, log_record): | ||
""" | ||
# Get schema for this log_record | ||
key = (log_record['__schema__'], log_record['__version__']) | ||
""" | ||
# Registered schemas are identified by their name and version. | ||
key = (log_record['__schema__'], log_record['__schema_version__']) | ||
schema = self.logger.schemas[key]['properties'] | ||
|
||
# Find all properties that have a level less than the handler. | ||
keys = list(log_record.keys()) | ||
for key in keys: | ||
# Ignore any keys that start with __ | ||
if not key.startswith('__'): | ||
# Check if security level is listed in the schema. | ||
if EVENT_MAP[schema[key]['level']] > EVENT_MAP[self.event_level]: | ||
# If property's level is less than handler's level, | ||
# delete this property from the log record. | ||
del log_record[key] | ||
props = [key for key in log_record.keys() | ||
if not key.startswith('__') and key != 'message'] | ||
|
||
# Walk through the recorded event and handle each key | ||
# based on its tag/category. | ||
for key in props: | ||
tag = schema[key]['tag'] | ||
if tag in self.allowed_tags or tag == "unrestricted": | ||
# If the tag is found in the allowed_tags trait, do nothing. | ||
if tag in self.hashed_tags: | ||
log_record = self.hash_property(key, log_record) | ||
# Drop tags not listed in allowed_tags | ||
else: | ||
log_record = self.drop_property(key, log_record) | ||
|
||
return log_record |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,39 +24,39 @@ def schema(schema_id, version): | |
'description': 'Test Event.', | ||
'type': 'object', | ||
'properties': { | ||
'nothing-exciting': { | ||
'description': 'a property with nothing exciting happening', | ||
'tag': 'unrestricted', | ||
'type': 'string' | ||
}, | ||
'id': { | ||
'description': 'user ID', | ||
'level': 'confidential', | ||
'description': 'user ID', | ||
'tag': 'user-identifier', | ||
'type': 'string' | ||
}, | ||
'email': { | ||
'description': 'email address', | ||
'level': 'secret', | ||
'tag': 'user-identifiable-information', | ||
'type': 'string' | ||
}, | ||
'name': { | ||
'description': 'name of user', | ||
'level': 'top_secret', | ||
'type': 'string' | ||
} | ||
} | ||
} | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'level,expected_props', | ||
'tags,expected_props', | ||
[ | ||
('unclassified', set()), | ||
('confidential', {'id'}), | ||
('secret', {'id', 'email'}), | ||
('top_secret', {'id', 'email', 'name'}) | ||
({'unrestricted'}, {'nothing-exciting'}), | ||
({'user-identifier'}, {'nothing-exciting', 'id'}), | ||
({'user-identifiable-information'}, {'nothing-exciting', 'email'}) | ||
] | ||
) | ||
def test_drop_sensitive_properties(schema, schema_id, version, level, expected_props): | ||
def test_properties_tags(schema, schema_id, version, tags, expected_props): | ||
sink = io.StringIO() | ||
|
||
# Create a handler that captures+records events with allowed tags. | ||
handler = logging.StreamHandler(sink) | ||
# Set the event level | ||
handler.event_level = level | ||
handler.allowed_tags = tags | ||
|
||
e = EventLog( | ||
handlers=[handler], | ||
|
@@ -65,19 +65,15 @@ def test_drop_sensitive_properties(schema, schema_id, version, level, expected_p | |
e.register_schema(schema) | ||
|
||
event = { | ||
'nothing-exciting': 'hello, world', | ||
'id': 'test id', | ||
'email': '[email protected]', | ||
'name': 'test name' | ||
} | ||
|
||
# Get a set of keys that should be missing | ||
dropped_props = set(event.keys()) - expected_props | ||
|
||
# Record event and read output | ||
e.record_event(schema_id, version, event) | ||
recorded_event = json.loads(sink.getvalue()) | ||
recorded_props = set(recorded_event.keys()) | ||
recorded_props = set([key for key in recorded_event if not key.startswith('__')]) | ||
|
||
assert expected_props == recorded_props | ||
|
||
# Assert that sensitive properties are dropped. | ||
assert len(dropped_props.intersection(recorded_props)) == 0 | ||
|