diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b81055 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +IndexErrors.txt +logs +tools +utils +*.pyc \ No newline at end of file diff --git a/EventMonkey.py b/EventMonkey.py new file mode 100644 index 0000000..c6d9f3a --- /dev/null +++ b/EventMonkey.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +import argparse +import logging +import multiprocessing + +import libem.WindowsEventManager as WindowsEventManager +import libem.Config as Config + +Config.Config.InitLoggers() +Config.Config.SetUiToCLI() + +MAIN_LOGGER = logging.getLogger('Main') + +def GetArguements(): + '''Get needed options for processesing''' + usage = '''EventMonkey (A Windows Event Parsing Utility)''' + + arguements = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=(usage) + ) + + arguements.add_argument( + '-n','--evidencename', + dest='evidencename', + required=True, + action="store", + type=unicode, + help='Path to Event Files' + ) + + arguements.add_argument( + '-p','--path', + dest='events_path', + required=True, + action="store", + type=unicode, + help='Path to Event Files' + ) + + arguements.add_argument( + '-o','--output_path', + dest='output_path', + required=True, + action="store", + type=unicode, + help='Output Path' + ) + + arguements.add_argument( + '--threads', + dest='threads_to_use', + action="store", + type=int, + default=Config.Config.CPU_COUNT, + help='Number of threads to use (default is all [{}])'.format(Config.Config.CPU_COUNT) + ) + + arguements.add_argument( + '--eshost', + dest='eshost', + action="store", + type=str, + default=None, + help='Elastic Host IP' + ) + + return arguements + +def Main(): + multiprocessing.freeze_support() + ###GET OPTIONS### + arguements = GetArguements() + options = arguements.parse_args() + + manager = WindowsEventManager.WindowsEventManager( + options + ) + manager.ProcessEvents() + +if __name__ == '__main__': + Main() \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8f71f43 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/etc/evtx.mapping.json b/etc/evtx.mapping.json new file mode 100644 index 0000000..ef3bee2 --- /dev/null +++ b/etc/evtx.mapping.json @@ -0,0 +1,515 @@ +"mappings": { + "winevent": { + "properties": { + "creation_time": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "computer_name": { + "type": "string" + }, + "identifier": { + "type": "long" + }, + "index_timestamp": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "offset": { + "type": "long" + }, + "written_time": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "UserData": { + "properties": { + "RmRestartEvent": { + "properties": { + "Applications": { + "properties": { + "Application": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "RmSessionId": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "nApplications": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "RebootReasons": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "EventXML": { + "properties": { + "binaryData": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "binaryDataSize": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "param1": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "param2": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "HungAppEvent": { + "properties": { + "AppName": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "RmApplicationEvent": { + "properties": { + "AppVersion": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Status": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "RmSessionId": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "TSSessionId": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "DisplayName": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "nFiles": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "FullPath": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Pid": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Files": { + "properties": { + "File": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "AppType": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "RmSessionEvent": { + "properties": { + "RmSessionId": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "UTCStartTime": { + "properties": { + "#text": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + } + } + } + } + }, + "RmUnsupportedRestartEvent": { + "properties": { + "AppVersion": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Status": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "RmSessionId": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "TSSessionId": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "DisplayName": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "FullPath": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Pid": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "AppType": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Reason": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "VetoAppEvent": { + "properties": { + "ResponseTime": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "AppName": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "data_0x8000003F": { + "properties": { + "Namespace": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Provider": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "LocationEvent": { + "properties": { + "FriendlyName": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Username": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "ImagePath": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "PID": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "SID": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + } + } + }, + "index": { + "type": "long" + }, + "event_category": { + "type": "long" + }, + "event_identifier": { + "type": "long" + }, + "EventData": { + "properties": { + "Data": { + "properties": { + "#text": { + "type": "string" + }, + "Name": { + "type": "string" + } + } + }, + "Binary": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Name": { + "type": "string" + } + } + }, + "System": { + "properties": { + "Correlation": { + "properties": { + "ActivityID": { + "type": "string" + } + } + }, + "Task": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Keywords": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Channel": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Opcode": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Security": { + "properties": { + "UserID": { + "type": "string" + } + } + }, + "Provider": { + "properties": { + "EventSourceName": { + "type": "string" + }, + "Guid": { + "type": "string" + }, + "Name": { + "type": "string" + } + } + }, + "TimeCreated": { + "properties": { + "SystemTime": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + } + } + }, + "EventRecordID": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Execution": { + "properties": { + "ThreadID": { + "type": "string" + }, + "ProcessID": { + "type": "string" + } + } + }, + "Version": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "Computer": { + "properties": { + "#text": { + "type": "string" + } + } + }, + "EventID": { + "properties": { + "#text": { + "type": "string" + }, + "Qualifiers": { + "type": "string" + } + } + }, + "Level": { + "properties": { + "#text": { + "type": "string" + } + } + } + } + }, + "eventfile_type": { + "type": "string" + }, + "recovered": { + "type": "boolean" + }, + "xmlns": { + "type": "string" + }, + "strings": { + "type": "string" + }, + "source_filename": { + "type": "string" + }, + "source_name": { + "type": "string" + } + } + } +} \ No newline at end of file diff --git a/etc/log_config.json b/etc/log_config.json new file mode 100644 index 0000000..95cb38f --- /dev/null +++ b/etc/log_config.json @@ -0,0 +1,110 @@ +{ + "version" : 1, + "disable_existing_loggers": true, + "formatters" : { + "simple": { + "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + }, + "custom1":{ + "format": "%(name)s|%(levelname)s|%(module)s|%(funcName)s|%(lineno)d|%(message)s" + } + }, + "handlers" : { + "console": { + "class": "logging.StreamHandler", + "level": "DEBUG", + "formatter": "simple", + "stream": "ext://sys.stdout" + }, + "main_handler": { + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "custom1", + "filename": "logs/main.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + "database_error_handler": { + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "custom1", + "filename": "logs/dbhandler.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + "elastic_handler": { + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "custom1", + "filename": "logs/elastic.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + "progress_manager_handler":{ + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "custom1", + "filename": "logs/progressbar.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + "xml_handler":{ + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "custom1", + "filename": "logs/xmlhandler.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + "windows_event_handler":{ + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "custom1", + "filename": "logs/windowsevents.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + } + }, + "loggers": { + "DbHandler": { + "level": "INFO", + "handlers": ["database_error_handler"], + "propagate": false + }, + "ProgressManager": { + "level": "INFO", + "handlers": ["progress_manager_handler"], + "propagate": false + }, + "ElasticHandler": { + "level": "INFO", + "handlers": ["elastic_handler"], + "propagate": false + }, + "XmlHandler": { + "level": "INFO", + "handlers": ["xml_handler"], + "propagate": false + }, + "Main": { + "level": "INFO", + "handlers": ["main_handler"], + "propagate": false + }, + "WinEvent": { + "level": "INFO", + "handlers": ["windows_event_handler"], + "propagate": false + } + }, + "root" : { + "level": "INFO", + "propagate": false + } +} \ No newline at end of file diff --git a/libem/Config.py b/libem/Config.py new file mode 100644 index 0000000..9c0270e --- /dev/null +++ b/libem/Config.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import time +import datetime +import multiprocessing +import os +import json +import logging +from logging.config import dictConfig + +class Config(): + UI_CLI = 0 + UI_GUI = 1 + UI_TYPE = None + + VERSION = '0.01' + STARTTIME = time.time() + CPU_COUNT = multiprocessing.cpu_count() + CPU_USE = CPU_COUNT + DEV_FLAG = False + + BUILD_DATETIME = datetime.datetime( + 2016, #Year + 4, #Month + 27 #Day + ) + + @staticmethod + def InitLoggers(path='etc/log_config.json'): + # Make sure logdir exists# + if not os.path.exists('logs'): + os.makedirs('logs') + + if os.path.exists(path): + with open(path, 'rb') as f: + config = json.load(f) + logging.config.dictConfig(config) + else: + raise Exception('No json log configuration file found at: {}'.format(path)) + + @staticmethod + def SetUiToCLI(): + Config.UI_TYPE = Config.UI_CLI + + @staticmethod + def SetUiToGUI(): + Config.UI_TYPE = Config.UI_GUI + + @staticmethod + def GetElapsedTime(): + return time.time() - Config.STARTTIME \ No newline at end of file diff --git a/libem/DbHandler.py b/libem/DbHandler.py new file mode 100644 index 0000000..db89db0 --- /dev/null +++ b/libem/DbHandler.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sqlite3 +import logging + +DB_LOGGER = logging.getLogger('DbHandler') + +def dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + +class DbConfig(): + def __init__(self,db_type=None,db=None,host=None,port=None,user=None,passwd=None): + self.db_type = db_type + self.db = db + self.host = host + self.port = port + self.user = user + self.passwd = passwd + + def GetDbHandle(self): + dbhandle = DbHandler( + self + ) + + return dbhandle + +class DbHandler(): + def __init__(self,db_config,table=None): + #Db Flags# + self.db_config = db_config + + def InitDb(self): + pass + + def CreateTableFromMapping(self,tbl_name,field_mapping,primary_key_str,field_order): + dbh = self.GetDbHandle() + + string = "CREATE TABLE IF NOT EXISTS {0:s} (\n".format(tbl_name) + for field in field_order: + string += "{0:s} {1:s},\n".format(field,field_mapping[field]) + + if primary_key_str is not None: + string = string + primary_key_str + else: + string = string[0:-2] + + string = string + ')' + + cursor = dbh.cursor() + + cursor.execute(string) + + def CreateInsertString(self,table,row,column_order,INSERT_STR=None): + columns = ', '.join(column_order) + + in_row = [] + + #logging.debug('column_order type: {}'.format(str(type(column_order)))) + + if type(column_order) == dict: + for key in column_order: + in_row.append(row[key]) + + elif type(column_order) == list: + '''Issue here...''' + if type(row) == list: + cnt = 0 + for key in column_order: + in_row.append(row[cnt]) + cnt += 1 + else: + for key in column_order: + in_row.append(row[key]) + + if self.db_config.db_type == 'sqlite': + placeholders = ','.join('?' * len(in_row)) + else: + raise Exception('Unsupported db type') + + if self.db_config.db_type == 'sqlite': + if INSERT_STR == None: + INSERT_STR = 'INSERT OR IGNORE' + + sql = '{} INTO {} ({}) VALUES ({})'.format(INSERT_STR,table,columns, placeholders) + + return sql + + def InsertFromListOfLists(self,table,rows_to_insert,column_order,INSERT_STR=None): + dbh = self.GetDbHandle() + sql_c = dbh.cursor() + + #Create SqlString# + sql = self.CreateInsertString( + table, + column_order, + column_order, + INSERT_STR=None + ) + + for row in rows_to_insert: + sql_c.execute(sql,row) + + dbh.commit() + + def InsertFromListOfDicts(self,table,rows_to_insert,column_order,INSERT_STR=None): + dbh = self.GetDbHandle() + sql_c = dbh.cursor() + + for row in rows_to_insert: + in_row = [] + sql = self.CreateInsertString( + table, + row, + column_order, + INSERT_STR=None + ) + + for key in column_order: + in_row.append(row[key]) + try: + sql_c.execute(sql,in_row) + except Exception as e: + DB_LOGGER.error("[ERROR] {}\n[SQL] {}\n[ROW] {}".format(str(e),sql,str(row))) + + dbh.commit() + + def DropTable(self,tbl_name): + dbh = self.GetDbHandle() + + string = "DROP TABLE IF EXISTS {0:s}".format(tbl_name) + + cursor = dbh.cursor() + try: + cursor.execute(string) + except: + pass + + dbh.commit() + + def CreateView(self,view_str): + dbh = self.GetDbHandle() + cursor = dbh.cursor() + try: + cursor.execute(view_str) + dbh.commit() + except Exception as e: + logging.error(str(e)) + + def GetRecordCount(self,table): + '''Get the record count for a given table''' + dbh = self.GetDbHandle() + sql_c = dbh.cursor() + + sql_string = 'SELECT COUNT(*) FROM {}'.format(table) + + sql_c.execute(sql_string) + row = sql_c.fetchone() + + return row[0] + + def GetDbHandle(self): + '''Create database handle based off of databaseinfo''' + dbh = None + + if self.db_config.db_type == 'sqlite': + dbh = sqlite3.connect( + self.db_config.db, + #isolation_level=None, + timeout=10000 + ) + else: + pass + + return dbh + + def FetchRecords(self,sql_string,row_factory=None): + '''Generator for return fields from sql_string + + Args: + sql_string: SQL statement to execute + row_factory: How to handle rows. + -MySQL: MySQLdb.cursors.DictCursor + + Yields: + list of column names, + row + ''' + dbh = self.GetDbHandle() + + column_names = [] + + if self.db_config.db_type == 'sqlite': + if row_factory == type(dict): + dbh.row_factory = dict_factory + else: + dbh.row_factory = sqlite3.Row + + sql_c = dbh.cursor() + else: + raise Exception('Unknown Database Type {}'.format(self.db_config['db_type'])) + + sql_c.execute(sql_string) + + for desc in sql_c.description: + column_names.append( + desc[0] + ) + + for record in sql_c: + yield column_names,record + + def GetDbTransaction(self): + return DbTransaction(self) + +class DbTransaction(): + def __init__(self,dbHandler): + self.dbHandler = dbHandler + self.dbh = self.dbHandler.GetDbHandle() + self.cur = self.dbh.cursor() + + def InsertDict(self,table,row,or_str='',column_order=None): + if self.dbHandler.db_config.db_type == 'sqlite': + if column_order is None: + columns = ', '.join(row.keys()) + placeholders = ':'+', :'.join(row.keys()) + query = 'INSERT %s INTO %s (%s) VALUES (%s)' % (or_str,table,columns,placeholders) + + self.cur.execute(query,row.values()) + else: + in_row = [] + query = self.CreateInsertString( + table, + row, + column_order, + INSERT_STR=None + ) + + for key in column_order: + in_row.append(row.get(key)) + + self.cur.execute(query,in_row) + else: + raise Exception('Unknown DB Type {}'.format( + self.dbHandler.db_config.db_type)) + + def CreateInsertString(self,table,row,column_order,INSERT_STR=None): + columns = ', '.join(column_order) + + in_row = [] + for key in column_order: + in_row.append(row.get(key,None)) + + if self.dbHandler.db_config.db_type == 'sqlite': + placeholders = ','.join('?' * len(in_row)) + else: + raise Exception('Unsupported db type') + + if self.dbHandler.db_config.db_type == 'sqlite': + if INSERT_STR == None: + INSERT_STR = 'INSERT OR IGNORE' + + sql = '{} INTO {} ({}) VALUES ({})'.format(INSERT_STR,table,columns, placeholders) + + return sql + + def Commit(self): + self.dbh.commit() + self.dbh.close() + \ No newline at end of file diff --git a/libem/ProgressManager.py b/libem/ProgressManager.py new file mode 100644 index 0000000..06108d8 --- /dev/null +++ b/libem/ProgressManager.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import logging +import multiprocessing +from multiprocessing.managers import BaseManager +from Config import Config +import progressbar +from progressbar import ProgressBar,Bar,Percentage,AnimatedMarker,Counter + +PROGRESS_LOGGER = logging.getLogger('ProgressManager') + +class ProcessingStatisticsClass(): + '''Store Processing Statistics for Processes Across Threads''' + def __init__ (self): + pass + +class ProgressBarClass(): + def __init__(self,interface_type,count=None,description=None): + self.interface_type = interface_type + self.current_value = 0 + + if self.interface_type == Config.UI_CLI: + widgets = [] + + if description is not None: + widgets.append('{}: '.format(description)) + + if count is not None: + widgets.append(Percentage()) + widgets.append(' ') + widgets.append(Bar()) + else: + widgets.append(Counter()) + widgets.append(' ') + widgets.append(AnimatedMarker(markers='.oO@* ')) + + if count is not None: + self.progressBar = ProgressBar(widgets=widgets, max_value=count) + else: + self.progressBar = ProgressBar(max_value=progressbar.UnknownLength,widgets=widgets) + else: + PROGRESS_LOGGER.error('interface type not handled: {}'.format(self.interface_type)) + raise Exception('interface type not handled: {}'.format(self.interface_type)) + + def SetValue(self,value): + if self.interface_type == Config.UI_CLI: + self.current_value = value + self.progressBar.update(self.current_value) + else: + PROGRESS_LOGGER.error('interface type not handled: {}'.format(self.interface_type)) + raise Exception('interface type not handled: {}'.format(self.interface_type)) + + def Increment(self,increment): + if self.interface_type == Config.UI_CLI: + self.current_value+=increment + try: + self.progressBar.update(self.current_value) + except: + pass + else: + PROGRESS_LOGGER.error('interface type not handled: {}'.format(self.interface_type)) + raise Exception('interface type not handled: {}'.format(self.interface_type)) + + def Finish(self): + if self.interface_type == Config.UI_CLI: + self.progressBar.finish() + + def Close(self): + if self.interface_type == Config.UI_CLI: + pass + +class ProgressManager(BaseManager): + value = 0 + +ProgressManager.register('ProgressBar', ProgressBarClass) diff --git a/libem/WindowsEventManager.py b/libem/WindowsEventManager.py new file mode 100644 index 0000000..1521912 --- /dev/null +++ b/libem/WindowsEventManager.py @@ -0,0 +1,641 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +import logging +import os +import multiprocessing +import json +import re +import hashlib +import datetime +import base64 + +import pyevtx +import pyevt + +import XmlHandler +import ProgressManager +import DbHandler +import elastichandler +import Config + +WINEVENT_LOGGER = logging.getLogger('WinEvent') + +WINEVENT_MAPPING_FILE = 'libem\\winevent.mapping.json' + +WINEVENT_COLUMN_ORDER = [ + 'we_hash_id', #Hash of xml event string + 'we_index', + 'we_source', #Source filename + 'we_jrec', #Json Record + 'eventfile_type', + 'computer_name', + 'event_category', + 'event_identifier', + 'event_identifier_qualifiers', + 'event_level', + 'identifier', + 'offset', + 'source_name', + 'strings', + 'user_security_identifier', + 'creation_time', + 'written_time', + 'xml_string', + 'data' +] + +WINEVENT_FIELD_MAPPING = { + 'we_hash_id':'CHAR(32)', + 'we_index':'BIGINT UNSIGNED', + 'we_source':'TEXT', + 'we_jrec':'JSON', + 'eventfile_type':'CHAR(4)', + 'computer_name':'TEXT', + 'event_category':'BIGINT UNSIGNED', + 'event_identifier':'BIGINT UNSIGNED', + 'event_identifier_qualifiers':'BIGINT UNSIGNED', + 'event_level':'INT UNSIGNED', + 'identifier':'BIGINT UNSIGNED', + 'offset':'BIGINT UNSIGNED', + 'source_name':'TEXT', + 'strings':'TEXT', + 'user_security_identifier':'TEXT', + 'creation_time':'DATETIME', + 'written_time':'DATETIME', + 'xml_string':'TEXT', + 'data':'BLOB', +} + +def Main(): + DEBUG_FILE = sys.argv[1] + RECORD_INDEX = sys.argv[2] + RECOVERED = sys.argv[3] + + if DEBUG_FILE.lower().endswith('.evt'): + evfile = pyevt.file() + evfile.open(DEBUG_FILE) + elif DEBUG_FILE.lower().endswith('.evtx'): + evfile = evtxfile.file() + evfile.open(DEBUG_FILE) + else: + print u'File needs .evt or .evtx extention. {}'.format(DEBUG_FILE) + sys.exit(1) + + if RECOVERED: + record = evtxfile.get_record(RECORD_INDEX) + else: + record = evtxfile.get_recovered_record(RECORD_INDEX) + + try: + xml_string = record.xml_string + except: + print u'Record has no xml_string' + + list_names = [ + 'Event.EventData.Data', + 'Event.EventData.Binary', + ] + + drec = XmlHandler.GetDictionary( + xml_string, + force_list=list_names + ) + + print drec + +class WindowsEventManager(): + '''Handle process management of event processing''' + def __init__(self,options): + self.options = options + self.total_records = 0 + + self._GetEventFileList() + + self._InitOutpath() + self._InitDb() + self._InitEsIndex() + + def _InitDb(self): + self.options.db_name = os.path.join( + self.options.output_path, + self.options.evidencename+'.db' + ) + + dbConfig = DbHandler.DbConfig( + db_type = 'sqlite', + db = self.options.db_name + ) + + dbHandler = dbConfig.GetDbHandle() + + dbHandler.DropTable('winevent') + + dbHandler.CreateTableFromMapping( + 'winevent', + WINEVENT_FIELD_MAPPING, + 'PRIMARY KEY (we_hash_id)', + WINEVENT_COLUMN_ORDER + ) + + def _InitOutpath(self): + '''Create output path if not exists''' + try: + os.makedirs(self.options.output_path) + except WindowsError as e: + # Output already exists + pass + except Exception as e: + WINEVENT_LOGGER.error('{}'.format(str(e))) + + def _InitEsIndex(self): + '''Initialize the Elastic Index''' + if self.options.eshost is not None: + self.options.index_name = GetIndexName( + self.options.evidencename + ) + + esConfig = elastichandler.EsConfig( + self.options.eshost + ) + + esHandler = esConfig.GetEsHandler() + + result = esHandler.CheckForIndex( + self.options.index_name + ) + + if result == False: + esHandler.InitializeIndex( + index=self.options.index_name + ) + + #Check if mapping exists# + result = esHandler.CheckForMapping( + 'winevent', + index=self.options.index_name + ) + + if result == False: + index_mapping = None + with open(WINEVENT_MAPPING_FILE,'rb') as evtmap: + index_mapping = json.load(evtmap) + + esHandler.InitializeMapping( + 'winevent', + index_mapping, + index=self.options.index_name + ) + + def _GetEventFileList(self): + '''Get file listing of event files from specified source path''' + self.filelist = [] + + for dirName, subdirList, fileList in os.walk(self.options.events_path): + for filename in fileList: + if filename.lower().endswith('.evt') or filename.lower().endswith('.evtx'): + fullname = os.path.join( + dirName, + filename + ) + self.filelist.append(fullname) + self.filelist.sort() + + progressBar = ProgressManager.ProgressBarClass( + Config.Config.UI_TYPE, + count = len(self.filelist), + description = u'Enumerating Event Files'.format(dirName) + ) + + _fcnt = 0 + for filename in self.filelist: + if filename.lower().endswith('evtx'): + wefile = pyevtx.file() + wefile.open(filename) + self.total_records += wefile.get_number_of_records() + self.total_records += wefile.get_number_of_recovered_records() + wefile.close() + + elif filename.lower().endswith('evt'): + wefile = pyevt.file() + wefile.open(filename) + self.total_records += wefile.get_number_of_records() + self.total_records += wefile.get_number_of_recovered_records() + wefile.close() + + progressBar.Increment(1) + _fcnt += 1 + + progressBar.Finish() + + def ProcessEvents(self): + '''Process event log files''' + print u'Total Records = {}'.format(self.total_records) + # filelist_str = u'' + # for filename in self.filelist: + # filelist_str += filename + u"\n" + # print u"Files to be processed:\n{}".format(filelist_str) + + #Progress Manager# + progressManager = ProgressManager.ProgressManager() + progressManager.start() + progressBar = progressManager.ProgressBar( + Config.Config.UI_TYPE, + count = self.total_records, + description = u'Processing Event Files' + ) + if self.options.threads_to_use > 1: + #Check to make sure enough files for all threads# + if len(self.filelist) < self.options.threads_to_use: + self.options.threads_to_use = len(self.filelist) + + #List to hold processes# + processes = [] + c = 0 + + #Iterate filenames for parsing# + for filename in self.filelist: + #Check if max threads are running# + while len(processes) >= self.options.threads_to_use: + index_list = [] #hold indexes to delete + #Wait till process frees up# + for i in range(len(processes)): + #If process has finished + result = processes[i].is_alive() + if result == False: + #Terminate process cleanly i guess... + processes[i].terminate() + + index_list.append(i) + + for i in sorted(index_list, key=int, reverse=True): + del processes[i] + + #Add process# + weHandler = WindowsEventHandler( + filename, + self.options, + progressBar + ) + worker = multiprocessing.Process( + target=WindowsEventHandler.ProcessRecords, + args=(weHandler,) + ) + worker.start() + #add running process to list# + processes.append( + worker + ) + + #Wait till all process have finished# + while len(processes) > 0: + for i in range(len(processes)): + try: + if not processes[i].is_alive(): + processes[i].terminate() + del processes[i] + except: + pass + else: + for filename in self.filelist: + name = os.path.basename(filename) + eventHandler = WindowsEventHandler( + filename, + self.options, + progressBar + ) + eventHandler.ProcessRecords() + + progressBar.Finish() + +class WindowsEventHandler(): + '''Handle operations for an event file''' + def __init__(self,filename,options,progressBar): + '''Initialize the Event Handler for processing an event file''' + self.filename = filename + self.options = options + self.ext = os.path.splitext(self.filename)[1] + self.eventfile_type = None + self.progressBar = progressBar + + def _OpenFile(self): + '''Open the WindowsEventHandler's evt or evtx file handle for processing''' + if self.ext.lower().endswith('evtx'): + self.eventfile_type = 'evtx' + self.file = pyevtx.file() + elif self.ext.lower().endswith('evt'): + self.eventfile_type = 'evt' + self.file = pyevt.file() + else: + raise Exception('{} Is not a supported extention. (.evt || .evtx) [{}]'.format( + self.ext,self.filename)) + + self.file.open(self.filename) + + def ProcessRecords(self): + '''Process Records''' + #Get current PID# + pid = os.getpid() + bname = os.path.basename(self.filename) + WINEVENT_LOGGER.info("[PID: {}][starting] Processing: {}".format( + pid,self.filename + )) + + # Open pyevtx file handle + self._OpenFile() + + esHandler = None + elastic_actions = [] + + options = self.options + + # Create DB Handler + dbConfig = DbHandler.DbConfig( + db_type = 'sqlite', + db = options.db_name + ) + dbHandler = dbConfig.GetDbHandle() + + # Create Elastic Handler + if options.eshost is not None: + esConfig = elastichandler.EsConfig( + options.eshost + ) + esHandler = esConfig.GetEsHandler() + + if len(self.file.records) == 0: + WINEVENT_LOGGER.info("[PID: {}] {} has no records.".format( + pid,self.filename + )) + else: + HandleRecords( + self.filename, + options, + self.eventfile_type, + self.file.records, + False, #recovered flag + dbHandler, + elastic_actions, + self.progressBar + ) + + if self.file.number_of_recovered_records == 0: + WINEVENT_LOGGER.debug("[PID: {}] {} has no recovered records.".format( + pid, + self.filename + )) + else: + HandleRecords( + self.filename, + options, + self.eventfile_type, + self.file.recovered_records, + True, #recovered flag + dbHandler, + elastic_actions, + self.progressBar + ) + + #Index Elastic Records# + if options.eshost is not None: + esHandler.BulkIndexRecords( + elastic_actions + ) + + WINEVENT_LOGGER.info("[PID: {}][finished] Processing: {}".format( + pid,self.filename + )) + +def HandleRecords(filename,options,eventfile_type,record_list,recovered,dbHandler,elastic_actions,progressBar): + pid = os.getpid() + sql_records = [] + + for i in range(len(record_list)): + progressBar.Increment(1) + try: + record = record_list[i] + except Exception as error: + WINEVENT_LOGGER.error("[PID: {}][{}] record index {}\tERROR: {}-{}\tRecovered: {}\tNot able to get record.".format( + pid, + filename, + i, + str(type(error)), + str(error), + str(recovered) + )) + continue + + #Get task id if exists for debugging# + taskid = None + + ######################################################################################################### + ## XML Handling + ######################################################################################################### + #if evtx, check xml string# + xml_string = None + jrec = None + drec = None + if filename.lower().endswith('.evtx'): + try: + xml_string = record.xml_string + #Strip null values just incase + xml_string = xml_string.strip(b'\0') + except Exception as error: + WINEVENT_LOGGER.warn("[PID: {}][{}] record index {}, event_id {}\tWARN: {}-{}\tRecovered: {}\tNot able to get xml string.".format( + pid, + filename, + i, + record.identifier, + str(type(error)), + str(error), + str(recovered) + )) + xml_string = None + + if xml_string is not None: + list_names = [ + 'Event.EventData.Data', + 'Event.EventData.Binary', + ] + drec = XmlHandler.GetDictionary(xml_string,force_list=list_names)['Event'] + jrec = json.dumps(drec) + try: + taskid = drec['System']['Task'] + except: + WINEVENT_LOGGER.debug('[PID: {}][{}] No Task ID for record at index {} (Recovered: {})'.format(pid,filename,i,str(recovered))) + ######################################################################################################### + + rdic = {} + rdic['eventfile_type']=eventfile_type + + if recovered: + # If the record is recovered but corrupt, we should try getting as many + # attributes as possible + try: + rdic['computer_name']=getattr(record,'computer_name',None) + except: + rdic['computer_name']=None + try: + rdic['creation_time']=getattr(record,'creation_time',None) + except: + rdic['creation_time']=None + try: + rdic['data']=getattr(record,'data',None) + except: + rdic['data']=None + try: + rdic['event_category']=getattr(record,'event_category',None) + except: + rdic['event_category']=None + try: + rdic['event_identifier']=getattr(record,'event_identifier',None) + except: + rdic['event_identifier']=None + try: + rdic['event_identifier_qualifiers']=getattr(record,'event_identifier_qualifiers',None) + except: + rdic['event_identifier_qualifiers']=None + try: + rdic['event_level']=getattr(record,'event_level',None) + except: + rdic['event_level']=None + try: + rdic['identifier']=getattr(record,'identifier',None) + except: + rdic['identifier']=None + try: + rdic['offset']=getattr(record,'offset',None) + except: + rdic['offset']=None + try: + rdic['source_name']=getattr(record,'source_name',None) + except: + rdic['source_name']=None + try: + rdic['user_security_identifier']=getattr(record,'user_security_identifier',None) + except: + rdic['user_security_identifier']=None + try: + rdic['written_time']=getattr(record,'written_time',None) + except: + rdic['written_time']=None + else: + rdic['computer_name']=getattr(record,'computer_name',None) + rdic['creation_time']=getattr(record,'creation_time',None) + + rdic['data']=getattr(record,'data',None) + + rdic['event_category']=getattr(record,'event_category',None) + rdic['event_identifier']=getattr(record,'event_identifier',None) + rdic['event_identifier_qualifiers']=getattr(record,'event_identifier_qualifiers',None) + rdic['event_level']=getattr(record,'event_level',None) + rdic['identifier']=getattr(record,'identifier',None) + rdic['offset']=getattr(record,'offset',None) + rdic['source_name']=getattr(record,'source_name',None) + rdic['user_security_identifier']=getattr(record,'user_security_identifier',None) + rdic['written_time']=getattr(record,'written_time',None) + + rdic['strings']=[] + rdic['xml_string']=xml_string + + c = 0 + + try: + for rstring in record.strings: + try: + rdic['strings'].append(rstring) + except Exception as error: + WINEVENT_LOGGER.info("[PID: {}][{}] record index {}, id {}\tINFO: {}-{}\tRecovered: {}\tNot able to get string at index {}.".format( + pid, + filename, + i, + record.identifier, + str(type(error)), + str(error), + str(recovered), + c + )) + c+=1 + rdic['strings'] = unicode(rdic['strings']) + except Exception as error: + WINEVENT_LOGGER.info("[PID: {}][{}] record index {}, id {}\tINFO: {}-{}\tRecovered: {}\tNot able to iterate strings.".format( + pid, + filename, + i, + record.identifier, + str(type(error)), + str(error), + str(recovered) + )) + rdic['strings'] = None + + #Create unique hash# + md5 = hashlib.md5() + md5.update(str(rdic)) + hash_id = md5.hexdigest() + + sql_insert = { + 'we_hash_id':hash_id, + 'we_source':filename, + 'we_jrec':jrec, + 'we_recovered':recovered, + 'we_index':i + } + + sql_insert.update(rdic) + + sql_records.append(sql_insert) + + #Add Elastic Records# + if options.eshost is not None: + #Add Timestamp# + timestamp = datetime.datetime.now() + + # If event type is evt, make drec = rdic + # This is because evt has no xml to make + # into a dictionary + if eventfile_type == 'evt' or drec is None: + # This contains binary, which is not supported by elastic, thus + # we need to remove it. We will encode it as base64 + dvalue = rdic.pop("data", None) + rdic['data_printable']=getattr(record,'data',None) + if rdic['data_printable'] is not None: + rdic['data_printable'] = rdic['data_printable'].decode('ascii','replace') + if dvalue is not None: + rdic['data_base64']=base64.b64encode(getattr(record,'data',None)) + else: + rdic['data_base64']=None + drec = rdic + + drec.update({ + 'index_timestamp': timestamp, + 'recovered':recovered, + 'source_filename':filename, + 'index':i + }) + + action = { + "_index": options.index_name, + "_type": 'winevent', + "_id": hash_id, + "_source": drec + } + + elastic_actions.append(action) + + dbHandler.InsertFromListOfDicts( + 'winevent', + sql_records, + WINEVENT_COLUMN_ORDER + ) + +def GetIndexName(index_name): + index_name = index_name.lower() + index_name = CleanIndexName(index_name) + return index_name + +def CleanIndexName(index_name): + return re.sub('[^a-zA-Z0-9-]', '-', index_name) + +if __name__ == '__main__': + Main() \ No newline at end of file diff --git a/libem/XmlHandler.py b/libem/XmlHandler.py new file mode 100644 index 0000000..d946f92 --- /dev/null +++ b/libem/XmlHandler.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from bs4 import BeautifulSoup +import bs4 + +def GetDictionary(xml_string,force_list=[]): + soup = BeautifulSoup(xml_string,'xml') + + xml = Xml( + soup, + force_list=force_list + ) + + return xml + +class Xml(dict): + def __init__(self,soup,force_list): + setattr(self,'depth',0) + setattr(self,'force_list',force_list) + setattr(self,'current_field_str','') + + for tag in soup.children: + ttype = dict + tag_dict = self._ProcessTag(tag,ttype) + if tag_dict is not None: + self.update(tag_dict) + + def _ProcessTag(self,tag,ttype): + ''' + args + tag: The tag to parse into dict struct + ttype: dict or list + return + tag_dict: dictionary representing tag + ''' + if type(tag) == bs4.element.NavigableString: + return None + + # Add current item name + self.current_field_str = self.current_field_str + u'{}'.format(tag.name) + + if ttype == list: + current_tag = {} + else: + current_tag = {} + current_tag[tag.name] = {} + + #Get Tag Attributes# + attr_flag = False + if hasattr(tag,'attrs'): + if len(tag.attrs) > 0: + attr_flag = True + if ttype == list: + current_tag.update(tag.attrs) + else: + current_tag[tag.name].update(tag.attrs) + + # Get Tag String# + if tag.string is not None: + tstr = tag.string + if ttype == list: + current_tag.update({ + '#text':tag.string + }) + else: + current_tag[tag.name].update({ + '#text':tag.string + }) + + # Iterate children tags# + for next_tag in tag.children: + #Skip if item is String (not tag)# + if type(next_tag) == bs4.element.NavigableString: + continue + + #We are diving down a tag, so increase depth# + self.depth += 1 + + next_name = self.current_field_str + u'.' + next_tag.name + + #Check if next tag needs to be list# + if CheckTagIsList(next_name,self.force_list,next_tag): + ttype = list + + self.current_field_str = self.current_field_str + u'.' + #Get Dictionary of next Tag# + tag_dict = self._ProcessTag( + next_tag, + ttype + ) + self.current_field_str = self.current_field_str[:-1] + + #Determine how to add to current tag# + if ttype == dict: + #Update if type dict# + current_tag[tag.name].update(tag_dict) + elif ttype == list: + if next_tag.name not in current_tag[tag.name]: + current_tag[tag.name][next_tag.name] = [] + current_tag[tag.name][next_tag.name].append(tag_dict) + else: + raise Exception('Bad Tag Type') + + #We are coming up a tag, so decrease depth# + self.depth -= 1 + + # Remove last item name + self.current_field_str = self.current_field_str[:-len(tag.name)] + + return current_tag + +def CheckTagIsList(current_field_str,force_list,tag): + if current_field_str in force_list: + return True + else: + test = tag.parent.find_all(tag.name) + if len(test) > 1: + return True + + return False + \ No newline at end of file diff --git a/libem/__init__.py b/libem/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libem/elastichandler.py b/libem/elastichandler.py new file mode 100644 index 0000000..b0e3f6f --- /dev/null +++ b/libem/elastichandler.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +# reload(sys) +# sys.setdefaultencoding('UTF8') +import logging +import os +import json +import md5 +import datetime +import re + +from elasticsearch import Elasticsearch +from elasticsearch.helpers import bulk as es_bulk +from elasticsearch import helpers + +#logging.getLogger("elasticsearch").setLevel(logging.ERROR) +logging.getLogger("urllib3").setLevel(logging.ERROR) + +ELASTIC_LOGGER = logging.getLogger('ElasticHandler') + +class EsConfig(): + def __init__(self,host=None): + self.host = host + + def GetEsHandler(self): + esHandler = EsHandler( + self + ) + + return esHandler + +class EsHandler(): + def __init__(self,esConfig): + self.current_index = None + self.esh = Elasticsearch( + esConfig.host, + timeout=100000 + ) + + def CheckForIndex(self,index_name): + return self.esh.indices.exists(index_name) + + def IndexRecord(self,index,doc_type,record): + ''' + Index a single record + IN + self: EsHandler + index: the index name + doc_type: the document type to index as + record: The dictionary record to be indexed + ''' + #Create hash of our record to be the id# + m = md5.new() + m.update(json.dumps(record)) + hash_id = m.hexdigest() + + #Index the record# + res = self.esh.index( + ignore=[400], #This will ignore fields if the field doesnt match the mapping type (important for fields where timestamp is blank) + index=index, + doc_type=doc_type, + id=hash_id, + body=record + ) + + def BulkIndexRecords(self,records): + ''' + Bulk Index Records + IN + self: EsHandler + records: a list of records to bulk index + ''' + ELASTIC_LOGGER.debug('[starting] Indexing Bulk Records') + success_count,failed_items = es_bulk( + self.esh, + records, + chunk_size=10000, + raise_on_error=False + ) + + if len(failed_items) > 0: + ELASTIC_LOGGER.error('[PID {}] {} index errors'.format( + os.getpid(),len(failed_items) + )) + for failed_item in failed_items: + ELASTIC_LOGGER.error(unicode(failed_item)) + + ELASTIC_LOGGER.debug('[finished] Indexing Bulk Records') + + def CheckForMapping(self,doc_type,index=None): + ''' + Check if a mapping exists for a given document type + IN + self: EsHandler + index: the name of the index + doc_type: the document type + OUT + True - Mapping exists for doc_type in index + False - Mapping does not exists for doc_type in index + ''' + index = self._SetIndex(index) + + mapping = self.esh.indices.get_mapping( + index = index, + doc_type = doc_type + ) + + count = len(mapping.keys()) + + if count > 0: + return True + + return False + + def InitializeMapping(self,doc_type,mapping,index=None): + ''' + Create mapping for a document type + IN + self: EsHandler + index: the name of the index + doc_type: the document type + mapping: The dictionary mapping (not a json string) + ''' + index = self._SetIndex(index) + + self.esh.indices.put_mapping( + doc_type=doc_type, + index=index, + body=mapping['mappings'] + ) + + def InitializeIndex(self,index=None): + ''' + Create an index + IN + self: EsHandler + index: the name of the index to create + ''' + index = self._SetIndex(index) + + request_body = { + "settings" : { + "number_of_shards": 1, + "number_of_replicas": 0, + 'analysis': { + 'analyzer': { + 'file_path': { + 'type': 'custom', + 'tokenizer': 'path_hierarchy', + 'filter': ['lowercase'] + } + } + } + }, + } + + res = self.esh.indices.create( + index = index, + body = request_body + ) + + def GetRecordsFromFile_Result(self,query_file,index=None): + ''' **NEEDS WORK - NOT COMPLETE** + Return results based off of a query from a json file + IN + self: EsHandler + index: the index name + query_file: the file that contains a query + OUT + None: This returns none because this function is not complete + ''' + index = self._SetIndex(index) + + with open(query_file,'rb') as qfh: + query = json.load(qfh) + + qfh.close + + result = self.esh.search( + index=index, + scroll='60s', + size=1000, + body=query + ) + + total_hits = result['hits']['total'] + + scroll_size = total_hits + + while (scroll_size > 0): + scroll_id = result['_scroll_id'] + + result = self.esh.scroll( + scroll_id=scroll_id, + scroll='60s' + ) + records = result['hits']['hits'] + + for hit in records: + yield hit + scroll_size -= len(records) + + def FetchRecordsFromQuery(self,query,index=None): + ''' + Yield hits based off of a query from a json str + IN + self: EsHandler + query: the query (can be dictionary or json str) + index: the index name + OUT + hit: Yields hits for the query + ''' + #If query is a string, load from json# + if isinstance(query,str) or isinstance(query,unicode): + query = json.loads(query) + + index = self._SetIndex(index) + + result = self.esh.search( + index=index, + scroll='60s', + size=1000, + body=query + ) + + total_hits = result['hits']['total'] + + scroll_size = total_hits + + while (scroll_size > 0): + scroll_id = result['_scroll_id'] + + for hit in result['hits']['hits']: + yield hit + + scroll_size -= len(result['hits']['hits']) + + result = self.esh.scroll( + scroll_id=scroll_id, + scroll='60s' + ) + + def GetRecordsFromQueryStr_Dict(self,json_str,mapping,index=None): + ''' + Return dictionary of results based off of mapping list. The last item in the + mapping list should be unique, otherwise reocrds can overwrite records. + + This function attempts to emulate perl dbi's fetchall_hashref([key,key,...]). + + IN + self: EsHandler + json_str: query + mapping: list of mapping keys + index: The index to search. default=None (if None, will use self.current_index) + OUT + record_dict: dictionary of hits based off of mapping + ''' + query = json.loads(json_str) + record_dict = {} + + if index == None: + if self.current_index == None: + msg = 'No index given, and no current index specified. Pass in index=INDEX or use EsHandler.SetCurrentIndex(INDEX) first' + raise Exception(msg) + else: + index = self.current_index + + result = self.esh.search( + index=index, + scroll='60s', + size=1000, + body=query + ) + + scroll_size = result['hits']['total'] + + while (scroll_size > 0): + scroll_id = result['_scroll_id'] + + for hit in result['hits']['hits']: + #eumerated mapping# + emapping = [] + #for each key in mapping, enumerate the value# + for key in mapping: + emapping.append( + hit['_source'][key] + ) + + #Set current level# + current_level = record_dict + + #set markers# + c = 1 + lp = len(emapping) + + #create dictionary keys based off of enumerated mapping# + for key in emapping: + if key not in current_level: + if lp == c: + current_level[key] = hit + else: + current_level[key] = {} + current_level = current_level[key] + c += 1 + #update scroll size# + scroll_size -= len(result['hits']['hits']) + + #update result# + result = self.esh.scroll( + scroll_id=scroll_id, + scroll='60s' + ) + + return record_dict + + def SetCurrentIndex(self,index_name): + ''' + Set the current index to index_name. + IN + self: EsHandler + index_name: name of the index + ''' + self.current_index = index_name + + def _SetIndex(self,index): + if index == None: + if self.current_index == None: + msg = 'No index given, and no current index specified. Pass in index=INDEX or use EsHandler.SetCurrentIndex(INDEX) first' + raise Exception(msg) + else: + index = self.current_index + + return index + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/libem/winevent.mapping.json b/libem/winevent.mapping.json new file mode 100644 index 0000000..9c5908c --- /dev/null +++ b/libem/winevent.mapping.json @@ -0,0 +1,50 @@ +{ + "mappings": { + "winevent": { + "properties": { + "creation_time": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "computer_name": { + "type": "string" + }, + "identifier": { + "type": "long" + }, + "index_timestamp": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "offset": { + "type": "long" + }, + "written_time": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "event_category": { + "type": "long" + }, + "event_identifier": { + "type": "long" + }, + "eventfile_type": { + "type": "string" + }, + "recovered": { + "type": "boolean" + }, + "strings": { + "type": "string" + }, + "source_filename": { + "type": "string" + }, + "source_name": { + "type": "string" + } + } + } + } +} \ No newline at end of file diff --git a/licenses/LICENSE.beautifulsoup4 b/licenses/LICENSE.beautifulsoup4 new file mode 100644 index 0000000..c323da0 --- /dev/null +++ b/licenses/LICENSE.beautifulsoup4 @@ -0,0 +1,17 @@ +Author: Leonard Richardson +Home Page: http://www.crummy.com/software/BeautifulSoup/bs4/ +Download URL: http://www.crummy.com/software/BeautifulSoup/bs4/download/ +License: MIT +Categories +Development Status :: 5 - Production/Stable +Intended Audience :: Developers +License :: OSI Approved :: MIT License +Programming Language :: Python +Programming Language :: Python :: 2 +Programming Language :: Python :: 3 +Topic :: Software Development :: Libraries :: Python Modules +Topic :: Text Processing :: Markup :: HTML +Topic :: Text Processing :: Markup :: SGML +Topic :: Text Processing :: Markup :: XML +Package Index Owner: leonard +DOAP record: beautifulsoup4-4.4.1.xml \ No newline at end of file diff --git a/licenses/LICENSE.libevtx b/licenses/LICENSE.libevtx new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/licenses/LICENSE.libevtx @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/licenses/LICENSE.progressbar2 b/licenses/LICENSE.progressbar2 new file mode 100644 index 0000000..f3aaf43 --- /dev/null +++ b/licenses/LICENSE.progressbar2 @@ -0,0 +1,28 @@ +Copyright (c) 2015, Rick van Hattem (Wolph) +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of Python Progressbar nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +