Skip to content

Commit

Permalink
replace multi-level approach to tagging approach
Browse files Browse the repository at this point in the history
  • Loading branch information
Zsailer committed May 13, 2020
1 parent 64088bf commit 2b17927
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 67 deletions.
6 changes: 1 addition & 5 deletions jupyter_telemetry/eventlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,13 @@ def register_schema(self, schema):
jsonschema.validators.validator_for(schema).check_schema(schema)

# Check that the properties we require are present
required_schema_fields = {'$id', 'version'}
required_schema_fields = {'$id', 'version', 'properties'}
for rsf in required_schema_fields:
if rsf not in schema:
raise ValueError(
'{} is required in schema specification'.format(rsf)
)

# Verify that all properties have a sensitivity level.
if


# Make sure reserved, auto-added fields are not in schema
if any([p.startswith('__') for p in schema['properties']]):
raise ValueError(
Expand Down
72 changes: 34 additions & 38 deletions jupyter_telemetry/formatter.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,58 @@
from traitlets import HasTraits, validate, Set
from pythonjsonlogger import jsonlogger


EVENT_MAP = {
'unrestricted': 0,
'user-identifier': 10,
'user-identifiable-information': 20
}

EVENT_LEVELS = list(EVENT_MAP.keys())


class JsonEventFormatter(jsonlogger.JsonFormatter):
"""Patch the jsonlogger formatter to include levels for telemetry.
Properties in a logged event that has a level less than
the handler's event_level will be dropped from the emitted event.
"""

def __init__(self, logger, handler, *args, **kwargs):
self.logger = logger
self.handler = handler
# Set the event logging level
self.event_level = getattr(handler, 'event_level', None)
super(JsonEventFormatter, self).__init__(*args, **kwargs)

# Protect the event_level attribute.
@property
def event_level(self):
"""Event Log security level."""
return self._event_level
def allowed_tags(self):
return getattr(self.handler, 'allowed_tags', {})

@event_level.setter
def event_level(self, event_level):
# Check that the event level makes sense.
if event_level not in EVENT_LEVELS:
raise Exception("Event level '{}' not understood.".format(event_level))
self._event_level = self.handler.event_level
@property
def hashed_tags(self):
return getattr(self.handler, 'hashed_tags', {})

def process_log_record(self, log_record):
log_record = super(JsonEventFormatter, self).process_log_record(log_record)
return self.process_event_levels(log_record)
return self.process_tags(log_record)

def process_event_levels(self, log_record):
"""Removes any properties in a log_record that have an attribute `pii = True`.
def drop_property(self, key, record):
del record[key]
return record

def hash_property(self, key, record):
hash_function = lambda x: x
record[key] = hash_function(record[key])
return record

def process_tags(self, log_record):
"""
# Get schema for this log_record
key = (log_record['__schema__'], log_record['__version__'])
"""
# Registered schemas are identified by their name and version.
key = (log_record['__schema__'], log_record['__schema_version__'])
schema = self.logger.schemas[key]['properties']

# Find all properties that have a level less than the handler.
keys = list(log_record.keys())
for key in keys:
# Ignore any keys that start with __
if not key.startswith('__'):
# Check if security level is listed in the schema.
if EVENT_MAP[schema[key]['level']] > EVENT_MAP[self.event_level]:
# If property's level is less than handler's level,
# delete this property from the log record.
del log_record[key]
props = [key for key in log_record.keys()
if not key.startswith('__') and key != 'message']

# Walk through the recorded event and handle each key
# based on its tag/category.
for key in props:
tag = schema[key]['tag']
if tag in self.allowed_tags or tag == "unrestricted":
# If the tag is found in the allowed_tags trait, do nothing.
if tag in self.hashed_tags:
log_record = self.hash_property(key, log_record)
# Drop tags not listed in allowed_tags
else:
log_record = self.drop_property(key, log_record)

return log_record
44 changes: 20 additions & 24 deletions tests/test_levels.py → tests/test_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,39 +24,39 @@ def schema(schema_id, version):
'description': 'Test Event.',
'type': 'object',
'properties': {
'nothing-exciting': {
'description': 'a property with nothing exciting happening',
'tag': 'unrestricted',
'type': 'string'
},
'id': {
'description': 'user ID',
'level': 'confidential',
'description': 'user ID',
'tag': 'user-identifier',
'type': 'string'
},
'email': {
'description': 'email address',
'level': 'secret',
'tag': 'user-identifiable-information',
'type': 'string'
},
'name': {
'description': 'name of user',
'level': 'top_secret',
'type': 'string'
}
}
}


@pytest.mark.parametrize(
'level,expected_props',
'tags,expected_props',
[
('unclassified', set()),
('confidential', {'id'}),
('secret', {'id', 'email'}),
('top_secret', {'id', 'email', 'name'})
({'unrestricted'}, {'nothing-exciting'}),
({'user-identifier'}, {'nothing-exciting', 'id'}),
({'user-identifiable-information'}, {'nothing-exciting', 'email'})
]
)
def test_drop_sensitive_properties(schema, schema_id, version, level, expected_props):
def test_properties_tags(schema, schema_id, version, tags, expected_props):
sink = io.StringIO()

# Create a handler that captures+records events with allowed tags.
handler = logging.StreamHandler(sink)
# Set the event level
handler.event_level = level
handler.allowed_tags = tags

e = EventLog(
handlers=[handler],
Expand All @@ -65,19 +65,15 @@ def test_drop_sensitive_properties(schema, schema_id, version, level, expected_p
e.register_schema(schema)

event = {
'nothing-exciting': 'hello, world',
'id': 'test id',
'email': '[email protected]',
'name': 'test name'
}

# Get a set of keys that should be missing
dropped_props = set(event.keys()) - expected_props

# Record event and read output
e.record_event(schema_id, version, event)
recorded_event = json.loads(sink.getvalue())
recorded_props = set(recorded_event.keys())
recorded_props = set([key for key in recorded_event if not key.startswith('__')])

assert expected_props == recorded_props

# Assert that sensitive properties are dropped.
assert len(dropped_props.intersection(recorded_props)) == 0

0 comments on commit 2b17927

Please sign in to comment.