From 4af8cb69f4753268bd102c74333918d9bad1957d Mon Sep 17 00:00:00 2001 From: Itamar Gafni Date: Mon, 15 Jul 2024 17:29:16 +0300 Subject: [PATCH] fix(linux-offline-scan): handle lists of files_info\loaded_module_info longer than 2000 from offline scans --- intezer_sdk/__init__.py | 2 +- intezer_sdk/_endpoint_analysis_api.py | 31 +++++++++++++++++++-------- intezer_sdk/endpoint_analysis.py | 10 ++++----- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/intezer_sdk/__init__.py b/intezer_sdk/__init__.py index 6b50f52..23280c1 100644 --- a/intezer_sdk/__init__.py +++ b/intezer_sdk/__init__.py @@ -1 +1 @@ -__version__ = '1.21' +__version__ = '1.22' diff --git a/intezer_sdk/_endpoint_analysis_api.py b/intezer_sdk/_endpoint_analysis_api.py index 2c739fe..df39975 100644 --- a/intezer_sdk/_endpoint_analysis_api.py +++ b/intezer_sdk/_endpoint_analysis_api.py @@ -34,10 +34,15 @@ def send_processes_info(self, processes_info: dict): raise_for_status(response) def send_all_loaded_modules_info(self, all_loaded_modules_info: dict): - response = self.request_with_refresh_expired_access_token(path=f'/processes/loaded-modules-info', - data=all_loaded_modules_info, - method='POST') - raise_for_status(response) + processes = all_loaded_modules_info.get('processes') + # Analyze schema limits to 2000 items per request + if processes: + for i in range(0, len(processes), 2000): + data = {'processes': processes[i:i+2000]} + response = self.request_with_refresh_expired_access_token(path=f'/processes/loaded-modules-info', + data=data, + method='POST') + raise_for_status(response) def send_loaded_modules_info(self, pid, loaded_modules_info: dict): response = self.request_with_refresh_expired_access_token(path=f'/processes/{pid}/loaded-modules-info', @@ -68,11 +73,19 @@ def send_files_info(self, files_info: dict) -> List[str]: :param files_info: endpoint scan files info :return: list of file hashes to upload """ - response = self.request_with_refresh_expired_access_token(path='/files-info', - data=files_info, - method='POST') - raise_for_status(response) - return response.json()['result'] + # Analyze schema limits to 2000 items per request + files_to_upload = [] + files_info = files_info.get('files_info') + if files_info: + for i in range(0, len(files_info), 2000): + data = {'files_info': files_info[i:i+2000]} + response = self.request_with_refresh_expired_access_token(path='/files-info', + data=data, + method='POST') + raise_for_status(response) + files_to_upload.extend(response.json()['result']) + + return files_to_upload def send_memory_module_dump_info(self, memory_modules_info: dict) -> List[str]: """ diff --git a/intezer_sdk/endpoint_analysis.py b/intezer_sdk/endpoint_analysis.py index 498e1b4..087456b 100644 --- a/intezer_sdk/endpoint_analysis.py +++ b/intezer_sdk/endpoint_analysis.py @@ -18,7 +18,6 @@ from intezer_sdk.consts import EndpointAnalysisEndReason from intezer_sdk.consts import SCAN_DEFAULT_MAX_WORKERS from intezer_sdk.sub_analysis import SubAnalysis - logger = logging.getLogger(__name__) @@ -218,24 +217,25 @@ def _send_loaded_modules_info(self): def _send_files_info_and_upload_required(self): logger.info(f'Endpoint analysis: {self.analysis_id}, uploading files info and uploading required files') + tasks = [] for files_info_file in glob.glob(os.path.join(self._offline_scan_directory, 'files_info_*.json')): logger.debug(f'Endpoint analysis: {self.analysis_id}, uploading {files_info_file}') with open(files_info_file, encoding='utf-8') as f: files_info = json.load(f) files_to_upload = self._scan_api.send_files_info(files_info) - futures = [] with concurrent.futures.ThreadPoolExecutor(self.max_workers) as executor: for file_to_upload in files_to_upload: file_path = os.path.join(self._files_dir, f'{file_to_upload}.sample') if os.path.isfile(file_path): - futures.append(executor.submit(self._scan_api.upload_collected_binary, + tasks.append(executor.submit(self._scan_api.upload_collected_binary, file_path, 'file-system')) else: logger.warning(f'Endpoint analysis: {self.analysis_id}, file {file_path} does not exist') - for future in concurrent.futures.as_completed(futures): - future.result() + + for future in concurrent.futures.as_completed(tasks): + future.result() def _send_module_differences(self): file_module_differences_file_path = os.path.join(self._offline_scan_directory, 'file_module_differences.json')