diff --git a/import_logs.py b/import_logs.py index 81b57b2..94d6b0a 100755 --- a/import_logs.py +++ b/import_logs.py @@ -278,6 +278,54 @@ def remove_ignored_groups(self, groups): for group in groups: del self.json[group] +class CaddyJsonFormat(BaseFormat): + def __init__(self, name): + super(CaddyJsonFormat, self).__init__(name) + self.json = None + self.date_format = '%Y-%m-%dT%H:%M:%S.%f' + + def check_format_line(self, line): + try: + self.json = json.loads(line) + return "request" in self.json and "user_id" in self.json and "resp_headers" in self.json + except: + return False + + def match(self, line): + try: + self.json = json.loads(line) + return self + except: + self.json = None + return None + + def get(self, key): + try: + return self.get_all().get(key) + except KeyError: + raise BaseFormatException() + + def get_all(self,): + tz = datetime.timezone.utc + date = datetime.datetime.fromtimestamp(self.json['ts'], tz=tz) + self.json['date'] = date.strftime(self.date_format) + self.json['timezone'] = date.strftime('%z') + self.json['length'] = str(self.json['size']) + self.json['status'] = str(self.json['status']) + self.json['generation_time_milli'] = str(self.json['duration'] * 1000.) + self.json['userid'] = self.json['user_id'] + self.json['ip'] = self.json['request']['client_ip'] + self.json['host'] = self.json['request']['host'] + self.json['method'] = self.json['request']['method'] + self.json['path'] = self.json['request']['uri'] + self.json['referrer'] = next(iter(self.json['request']['headers'].get('Referer', [])), None) + self.json['user_agent'] = next(iter(self.json['request']['headers'].get('User-Agent', [])), None) + return self.json + + def remove_ignored_groups(self, groups): + for group in groups: + del self.json[group] + class RegexFormat(BaseFormat): def __init__(self, name, regex, date_format=None): @@ -590,6 +638,7 @@ def get(self, key): 'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'), 'traefik_json': TraefikJsonFormat('traefik_json'), 'nginx_json': NginxJsonFormat('nginx_json'), + 'caddy_json': CaddyJsonFormat('caddy_json'), 'ovh': RegexFormat('ovh', _OVH_FORMAT), 'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'), 'gandi': RegexFormat('gandi', _GANDI_SIMPLE_HOSTING_FORMAT, '%d/%b/%Y:%H:%M:%S') diff --git a/tests/logs/caddy_json.log b/tests/logs/caddy_json.log new file mode 100644 index 0000000..c59c74d --- /dev/null +++ b/tests/logs/caddy_json.log @@ -0,0 +1,3 @@ +{"level":"info","ts":1703373474.8155608,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/","headers":{"Te":["trailers"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Accept":["text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/jxl,image/webp,*/*;q=0.8"],"Upgrade-Insecure-Requests":["1"],"Sec-Fetch-Mode":["navigate"],"Sec-Fetch-Dest":["document"],"Sec-Fetch-Site":["none"],"Sec-Fetch-User":["?1"],"Accept-Language":["en-IE"],"Accept-Encoding":["gzip, deflate, br"],"Dnt":["1"],"Sec-Gpc":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.001335486,"size":3609,"status":200,"resp_headers":{"Content-Encoding":["gzip"],"Vary":["Accept-Encoding"],"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Content-Type":["text/html"],"Access-Control-Allow-Methods":["GET"],"Access-Control-Allow-Origin":["*"]}} +{"level":"info","ts":1703373474.9011197,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/assets/index-mMaLXldj.css","headers":{"Accept-Encoding":["gzip, deflate, br"],"Dnt":["1"],"Sec-Fetch-Dest":["style"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Accept":["text/css,*/*;q=0.1"],"Referer":["https://example.com/beta/"],"Sec-Fetch-Mode":["cors"],"Sec-Fetch-Site":["same-origin"],"Te":["trailers"],"Accept-Language":["en-IE"],"Sec-Gpc":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.00143684,"size":34534,"status":200,"resp_headers":{"Content-Length":["34534"],"Last-Modified":["Sat, 23 Dec 2023 12:27:10 GMT"],"Etag":["\"6586d21e-86e6\""],"Vary":["Accept-Encoding"],"Content-Encoding":["br"],"Accept-Ranges":["bytes"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Content-Type":["text/css"],"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"]}} +{"level":"info","ts":1703373475.141868,"logger":"http.log.access","msg":"handled request","request":{"remote_ip":"1.2.3.4","remote_port":"64985","client_ip":"1.2.3.4","proto":"HTTP/2.0","method":"GET","host":"example.com","uri":"/beta/assets/index-HIcRLzdf.js","headers":{"Accept":["*/*"],"Accept-Language":["en-IE"],"Accept-Encoding":["gzip, deflate, br"],"Sec-Fetch-Dest":["script"],"Sec-Fetch-Site":["same-origin"],"Te":["trailers"],"User-Agent":["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0"],"Sec-Gpc":["1"],"Referer":["https://example.com/beta/"],"Sec-Fetch-Mode":["cors"],"Dnt":["1"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h2","server_name":"example.com"}},"bytes_read":0,"user_id":"","duration":0.242570094,"size":217912,"status":200,"resp_headers":{"Server":["Caddy","nginx/1.20.1"],"Alt-Svc":["h3=\":443\"; ma=2592000"],"Content-Type":["application/javascript"],"Accept-Ranges":["bytes"],"Content-Length":["217912"],"Etag":["\"6586d21e-35338\""],"Content-Encoding":["br"],"Vary":["Accept-Encoding"],"Date":["Sat, 23 Dec 2023 23:17:54 GMT"],"Last-Modified":["Sat, 23 Dec 2023 12:27:10 GMT"]}} diff --git a/tests/test_main.py b/tests/test_main.py index 185d049..e6a2f19 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -85,6 +85,8 @@ def _test_junk(format_name, log_file = None): assert(format.name == format_name) def _test_multiple_spaces(format_name, log_file = None): + if format_name == 'caddy_json': + return if log_file is None: log_file = 'logs/%s.log' % format_name @@ -416,6 +418,22 @@ def check_traefik_json_groups(groups): assert groups['userid'] == '-' assert groups['user_agent'] == 'Prometheus/2.40.5' +def check_caddy_json_groups(groups): + assert groups['ts'] == 1703373474.8155608 + assert groups['duration'] == 0.001335486 + assert groups['date'] == '2023-12-23T23:17:54.815561' + assert groups['timezone'] == '+0000' + assert groups['generation_time_milli'] == '1.3354860000000002' + assert groups['host'] == 'example.com' + assert groups['ip'] == '1.2.3.4' + assert groups['length'] == '3609' + assert groups['method'] == 'GET' + assert groups['path'] == '/beta/' + assert groups['referrer'] == None + assert groups['status'] == '200' + assert groups['userid'] == '' + assert groups['user_agent'] == 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0' + def check_icecast2_groups(groups): check_ncsa_extended_groups(groups) @@ -466,6 +484,41 @@ def _test_with_junk(format_name, path): # 'Testing parsing of format "common" with ncsa_extended log' _test( 'common', 'logs/ncsa_extended.log') +def test_caddy_json_parsing(): + """test parsing of caddy_json.log file""" + + file_ = 'logs/caddy_json.log' + + import_logs.stats = import_logs.Statistics() + import_logs.config = Config() + import_logs.config.options.enable_static = False + import_logs.config.options.replay_tracking = False + import_logs.config.format = None + import_logs.resolver = Resolver() + import_logs.parser = import_logs.Parser() + import_logs.Recorder = Recorder() + Recorder.recorders = [] + import_logs.parser.parse(file_) + + hits = [hit.__dict__ for hit in Recorder.recorders] + + assert hits[0]['status'] == '200' + assert hits[0]['is_error'] == False + assert hits[0]['extension'] == '/beta/' + assert hits[0]['is_download'] == False + assert hits[0]['referrer'] == '' + assert hits[0]['generation_time_milli'] == 1.3354860000000002 + assert hits[0]['host'] == 'foo' + assert hits[0]['filename'] == 'logs/caddy_json.log' + assert hits[0]['is_redirect'] == False + assert hits[0]['date'] == datetime.datetime(2023, 12, 23, 23, 17, 54, 815561) + assert hits[0]['lineno'] == 0 + assert hits[0]['ip'] == '1.2.3.4' + assert hits[0]['path'] == '/beta/' + assert hits[0]['is_robot'] == False + assert hits[0]['full_path'] == '/beta/' + assert hits[0]['user_agent'] == 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0' + def test_iis_custom_format(): """test IIS custom format name parsing."""