Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Caddy json log support #367

Merged
merged 3 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions import_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,54 @@ def remove_ignored_groups(self, groups):
for group in groups:
del self.json[group]

class CaddyJsonFormat(BaseFormat):
def __init__(self, name):
super(CaddyJsonFormat, self).__init__(name)
self.json = None
self.date_format = '%Y-%m-%dT%H:%M:%S.%f'

def check_format_line(self, line):
try:
self.json = json.loads(line)
return "request" in self.json and "user_id" in self.json and "resp_headers" in self.json
except:
return False

def match(self, line):
try:
self.json = json.loads(line)
return self
except:
self.json = None
return None

def get(self, key):
try:
return self.get_all().get(key)
except KeyError:
raise BaseFormatException()

def get_all(self,):
tz = datetime.timezone.utc
date = datetime.datetime.fromtimestamp(self.json['ts'], tz=tz)
self.json['date'] = date.strftime(self.date_format)
self.json['timezone'] = date.strftime('%z')
self.json['length'] = str(self.json['size'])
self.json['status'] = str(self.json['status'])
self.json['generation_time_milli'] = str(self.json['duration'] * 1000.)
self.json['userid'] = self.json['user_id']
self.json['ip'] = self.json['request']['client_ip']
self.json['host'] = self.json['request']['host']
self.json['method'] = self.json['request']['method']
self.json['path'] = self.json['request']['uri']
self.json['referrer'] = next(iter(self.json['request']['headers'].get('Referer', [])), None)
self.json['user_agent'] = next(iter(self.json['request']['headers'].get('User-Agent', [])), None)
return self.json

def remove_ignored_groups(self, groups):
for group in groups:
del self.json[group]

class RegexFormat(BaseFormat):

def __init__(self, name, regex, date_format=None):
Expand Down Expand Up @@ -590,6 +638,7 @@ def get(self, key):
'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'),
'traefik_json': TraefikJsonFormat('traefik_json'),
'nginx_json': NginxJsonFormat('nginx_json'),
'caddy_json': CaddyJsonFormat('caddy_json'),
'ovh': RegexFormat('ovh', _OVH_FORMAT),
'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'),
'gandi': RegexFormat('gandi', _GANDI_SIMPLE_HOSTING_FORMAT, '%d/%b/%Y:%H:%M:%S')
Expand Down
3 changes: 3 additions & 0 deletions tests/logs/caddy_json.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"level":"info","ts":1703373474.8155608,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/","headers":{"Te":["trailers"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Accept":["text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/jxl,image/webp,*/*;q=0.8"],"Upgrade-Insecure-Requests":["1"],"Sec-Fetch-Mode":["navigate"],"Sec-Fetch-Dest":["document"],"Sec-Fetch-Site":["none"],"Sec-Fetch-User":["?1"],"Accept-Language":["en-IE"],"Accept-Encoding":["gzip, deflate, br"],"Dnt":["1"],"Sec-Gpc":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.001335486,"size":3609,"status":200,"resp_headers":{"Content-Encoding":["gzip"],"Vary":["Accept-Encoding"],"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Content-Type":["text/html"],"Access-Control-Allow-Methods":["GET"],"Access-Control-Allow-Origin":["*"]}}
{"level":"info","ts":1703373474.9011197,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/assets/index-mMaLXldj.css","headers":{"Accept-Encoding":["gzip, deflate, br"],"Dnt":["1"],"Sec-Fetch-Dest":["style"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Accept":["text/css,*/*;q=0.1"],"Referer":["https://example.com/beta/"],"Sec-Fetch-Mode":["cors"],"Sec-Fetch-Site":["same-origin"],"Te":["trailers"],"Accept-Language":["en-IE"],"Sec-Gpc":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.00143684,"size":34534,"status":200,"resp_headers":{"Content-Length":["34534"],"Last-Modified":["Sat, 23 Dec 2023 12:27:10 GMT"],"Etag":["\"6586d21e-86e6\""],"Vary":["Accept-Encoding"],"Content-Encoding":["br"],"Accept-Ranges":["bytes"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Content-Type":["text/css"],"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"]}}
{"level":"info","ts":1703373475.141868,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/assets/index-HIcRLzdf.js","headers":{"Accept":["*/*"],"Accept-Language":["en-IE"],"Accept-Encoding":["gzip, deflate, br"],"Sec-Fetch-Dest":["script"],"Sec-Fetch-Site":["same-origin"],"Te":["trailers"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Sec-Gpc":["1"],"Referer":["https://example.com/beta/"],"Sec-Fetch-Mode":["cors"],"Dnt":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.242570094,"size":217912,"status":200,"resp_headers":{"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"],"Content-Type":["application/javascript"],"Accept-Ranges":["bytes"],"Content-Length":["217912"],"Etag":["\"6586d21e-35338\""],"Content-Encoding":["br"],"Vary":["Accept-Encoding"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Last-Modified":["Sat, 23 Dec 2023 12:27:10 GMT"]}}
53 changes: 53 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def _test_junk(format_name, log_file = None):
assert(format.name == format_name)

def _test_multiple_spaces(format_name, log_file = None):
if format_name == 'caddy_json':
return
if log_file is None:
log_file = 'logs/%s.log' % format_name

Expand Down Expand Up @@ -416,6 +418,22 @@ def check_traefik_json_groups(groups):
assert groups['userid'] == '-'
assert groups['user_agent'] == 'Prometheus/2.40.5'

def check_caddy_json_groups(groups):
assert groups['ts'] == 1703373474.8155608
assert groups['duration'] == 0.001335486
assert groups['date'] == '2023-12-23T23:17:54.815561'
assert groups['timezone'] == '+0000'
assert groups['generation_time_milli'] == '1.3354860000000002'
assert groups['host'] == 'example.com'
assert groups['ip'] == '1.2.3.4'
assert groups['length'] == '3609'
assert groups['method'] == 'GET'
assert groups['path'] == '/beta/'
assert groups['referrer'] == None
assert groups['status'] == '200'
assert groups['userid'] == ''
assert groups['user_agent'] == 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0'

def check_icecast2_groups(groups):
check_ncsa_extended_groups(groups)

Expand Down Expand Up @@ -466,6 +484,41 @@ def _test_with_junk(format_name, path):
# 'Testing parsing of format "common" with ncsa_extended log'
_test( 'common', 'logs/ncsa_extended.log')

def test_caddy_json_parsing():
"""test parsing of caddy_json.log file"""

file_ = 'logs/caddy_json.log'

import_logs.stats = import_logs.Statistics()
import_logs.config = Config()
import_logs.config.options.enable_static = False
import_logs.config.options.replay_tracking = False
import_logs.config.format = None
import_logs.resolver = Resolver()
import_logs.parser = import_logs.Parser()
import_logs.Recorder = Recorder()
Recorder.recorders = []
import_logs.parser.parse(file_)

hits = [hit.__dict__ for hit in Recorder.recorders]

assert hits[0]['status'] == '200'
assert hits[0]['is_error'] == False
assert hits[0]['extension'] == '/beta/'
assert hits[0]['is_download'] == False
assert hits[0]['referrer'] == ''
assert hits[0]['generation_time_milli'] == 1.3354860000000002
assert hits[0]['host'] == 'foo'
assert hits[0]['filename'] == 'logs/caddy_json.log'
assert hits[0]['is_redirect'] == False
assert hits[0]['date'] == datetime.datetime(2023, 12, 23, 23, 17, 54, 815561)
assert hits[0]['lineno'] == 0
assert hits[0]['ip'] == '1.2.3.4'
assert hits[0]['path'] == '/beta/'
assert hits[0]['is_robot'] == False
assert hits[0]['full_path'] == '/beta/'
assert hits[0]['user_agent'] == 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0'

def test_iis_custom_format():
"""test IIS custom format name parsing."""

Expand Down
Loading