Fix bug where we assume output_file is zip

cado-security · Sep 24, 2024 · 3c0e91d · 3c0e91d
1 parent 0e49eff
commit 3c0e91d
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 90 deletions.
diff --git a/varc_core/systems/base_system.py b/varc_core/systems/base_system.py
@@ -99,8 +99,6 @@ def __init__(
             raise ValueError(
                 "Only one of Process name or Process ID (PID) can be used. Please re-run using one or the other.")
 
-        self.acquire_volatile()
-
         if self.yara_file:
             if not _YARA_AVAILABLE:
                 logging.error("YARA not available. yara-python is required and is either not installed or not functioning correctly.")
@@ -113,15 +111,21 @@ def __init__(
 
         if self.yara_file and not self.include_memory and _YARA_AVAILABLE:
             logging.info("YARA hits will be recorded only since include_memory is not selected.")
+
+        with self._open_output() as output:
+
+            self.acquire_volatile(output)
 
-        if self.include_memory:
-            if self.yara_file:
-                self.yara_scan()
-            self.dump_processes()
+            if self.include_memory:
+                if self.yara_file:
+                    self.yara_scan(output)
+                self.dump_processes(output)
 
-            if self.extract_dumps:
-                from varc_core.utils import dumpfile_extraction
-                dumpfile_extraction.extract_dumps(Path(self.output_path))
+        if self.extract_dumps:
+            if not self.output_path.endswith('.zip'):
+                logging.warning('extract_dumps only supported with zip output')
+            from varc_core.utils import dumpfile_extraction
+            dumpfile_extraction.extract_dumps(Path(self.output_path))
 
     def get_network(self) -> List[str]:
         """Get active network connections
@@ -311,7 +315,7 @@ def take_screenshot(self) -> Optional[bytes]:
             logging.error("Unable to take screenshot")
         return None
 
-    def acquire_volatile(self) -> None:
+    def acquire_volatile(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None:
         """Acquire volatile data into a zip file
         This is called by all OS's
         """
@@ -327,35 +331,34 @@ def acquire_volatile(self) -> None:
         else:
             screenshot_image = None
 
-        with self._open_output() as output_file:
-            if screenshot_image:
-                output_file.writestr(f"{self.get_machine_name()}-{self.timestamp}.png", screenshot_image)
-            for key, value in table_data.items():
-                output_file.writestr(f"{key}.json", value.encode())
-            if self.network_log:
-                logging.info("Adding Netstat Data")
-                output_file.writestr("netstat.log", "\r\n".join(self.network_log).encode())
-            if self.include_open and self.dumped_files:
-                for file_path in self.dumped_files:
-                    logging.info(f"Adding open file {file_path}")
-                    try:
-                        if os.path.getsize(file_path) > _MAX_OPEN_FILE_SIZE:
-                            logging.warning(f"Skipping file as too large {file_path}")
-                        else:
-                            try:
-                                output_file.write(file_path, strip_drive(f"./collected_files/{file_path}"))
-                            except PermissionError:
-                                logging.warn(f"Permission denied copying {file_path}")
-                    except FileNotFoundError:
-                        logging.warning(f"Could not open {file_path} for reading")
+        if screenshot_image:
+            output_file.writestr(f"{self.get_machine_name()}-{self.timestamp}.png", screenshot_image)
+        for key, value in table_data.items():
+            output_file.writestr(f"{key}.json", value.encode())
+        if self.network_log:
+            logging.info("Adding Netstat Data")
+            output_file.writestr("netstat.log", "\r\n".join(self.network_log).encode())
+        if self.include_open and self.dumped_files:
+            for file_path in self.dumped_files:
+                logging.info(f"Adding open file {file_path}")
+                try:
+                    if os.path.getsize(file_path) > _MAX_OPEN_FILE_SIZE:
+                        logging.warning(f"Skipping file as too large {file_path}")
+                    else:
+                        try:
+                            output_file.write(file_path, strip_drive(f"./collected_files/{file_path}"))
+                        except PermissionError:
+                            logging.warn(f"Permission denied copying {file_path}")
+                except FileNotFoundError:
+                    logging.warning(f"Could not open {file_path} for reading")
 
     def _open_output(self) -> Union[zipfile.ZipFile, _TarLz4Wrapper]:
         if self.output_path.endswith('.tar.lz4'):
             return _TarLz4Wrapper(self.output_path)
         else:
             return zipfile.ZipFile(self.output_path, 'a', compression=zipfile.ZIP_DEFLATED)
 
-    def yara_scan(self) -> None:
+    def yara_scan(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None:
         def yara_hit_callback(hit: dict) -> Any:
             self.yara_results.append(hit)
             if self.include_memory:
@@ -367,7 +370,6 @@ def yara_hit_callback(hit: dict) -> Any:
         if not _YARA_AVAILABLE:
             return None
 
-        archive_out = self.output_path
         for proc in tqdm(self.process_info, desc="YARA scan progess", unit=" procs"):
             pid = proc["Process ID"]
             p_name = proc["Name"]
@@ -385,11 +387,11 @@ def yara_hit_callback(hit: dict) -> Any:
             combined_yara_results = []
             for yara_hit in self.yara_results:
                 combined_yara_results.append(self.yara_hit_readable(yara_hit))
-            with zipfile.ZipFile(archive_out, 'a', compression=zipfile.ZIP_DEFLATED) as zip_file:
-                zip_file.writestr("yara_results.json", self.dict_to_json(combined_yara_results))
-                logging.info("YARA scan results written to yara_results.json in output archive.")
+                
+            output_file.writestr("yara_results.json", self.dict_to_json(combined_yara_results))
+            logging.info("YARA scan results written to yara_results.json in output archive.")
         else:
             logging.info("No YARA rules were triggered. Nothing will be written to the output archive.")
 
-    def dump_processes(self) -> None:
+    def dump_processes(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None:
         raise NotImplementedError()
diff --git a/varc_core/systems/linux.py b/varc_core/systems/linux.py
@@ -5,10 +5,10 @@
 from os import getpid, sep
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import Any, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple, Union
 
 from tqdm import tqdm
-from varc_core.systems.base_system import BaseSystem
+from varc_core.systems.base_system import _TarLz4Wrapper, BaseSystem
 
 # based on https://stackoverflow.com/questions/48897687/why-does-the-syscall-process-vm-readv-sets-errno-to-success and PymemLinux library
 
@@ -83,51 +83,49 @@ def read_bytes(self, pid: int, address: int, byte: int) -> Optional[bytes]:
 
         return buff.raw
 
-    def dump_processes(self) -> None:
+    def dump_processes(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None:
         """Dumps all processes to temp files, adds temp file to output archive then removes the temp file"""
-        archive_out = self.output_path
         own_pid = getpid()
-        with zipfile.ZipFile(archive_out, "a", compression=zipfile.ZIP_DEFLATED) as zip_file:
-            try:
-                for proc in tqdm(self.process_info, desc="Process dump progess", unit=" procs"):
-                    # If scanning with YARA, only dump processes if they triggered a rule
-                    if self.yara_hit_pids:
-                        if proc["Process ID"] not in self.yara_hit_pids or proc["Process ID"] == own_pid:
-                            continue
-                    pid = proc["Process ID"]
-                    p_name = proc["Name"]
-                    maps = self.parse_mem_map(pid, p_name)
-                    if not maps:
+        try:
+            for proc in tqdm(self.process_info, desc="Process dump progess", unit=" procs"):
+                # If scanning with YARA, only dump processes if they triggered a rule
+                if self.yara_hit_pids:
+                    if proc["Process ID"] not in self.yara_hit_pids or proc["Process ID"] == own_pid:
                         continue
-                    with NamedTemporaryFile(mode="w+b", buffering=0, delete=True) as tmpfile:
-                        try:
-                            for map in maps:
-                                page_start = map[0]
-                                page_len = map[1] - map[0]
-                                if page_len > _MAX_VIRTUAL_PAGE_CHUNK:
-                                    sub_chunk_count, final_chunk_size = divmod(page_len, _MAX_VIRTUAL_PAGE_CHUNK)
-                                    page_len = int(page_len / sub_chunk_count)
-                                    for sc in range(0, sub_chunk_count):
-                                        mem_page_content = self.read_bytes(pid, page_start, page_len)
-                                        if mem_page_content:
-                                            tmpfile.write(mem_page_content)
-                                        page_start = page_start + _MAX_VIRTUAL_PAGE_CHUNK
-                                    mem_page_content = self.read_bytes(pid, page_start, final_chunk_size)
-                                    if mem_page_content:
-                                        tmpfile.write(mem_page_content)
-                                else:
+                pid = proc["Process ID"]
+                p_name = proc["Name"]
+                maps = self.parse_mem_map(pid, p_name)
+                if not maps:
+                    continue
+                with NamedTemporaryFile(mode="w+b", buffering=0, delete=True) as tmpfile:
+                    try:
+                        for map in maps:
+                            page_start = map[0]
+                            page_len = map[1] - map[0]
+                            if page_len > _MAX_VIRTUAL_PAGE_CHUNK:
+                                sub_chunk_count, final_chunk_size = divmod(page_len, _MAX_VIRTUAL_PAGE_CHUNK)
+                                page_len = int(page_len / sub_chunk_count)
+                                for sc in range(0, sub_chunk_count):
                                     mem_page_content = self.read_bytes(pid, page_start, page_len)
                                     if mem_page_content:
                                         tmpfile.write(mem_page_content)
-                            zip_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem")
-                        except PermissionError:
-                            logging.warning(f"Permission denied opening process memory for {p_name} (pid {pid}). Cannot dump this process.")
-                            continue
-                        except OSError as oserror:
-                            logging.warning(f"Error opening process memory page for {p_name} (pid {pid}). Error was {oserror}. Dump may be incomplete.")
-                            pass
-            except MemoryError:
-                logging.warning("Exceeded available memory, skipping further memory collection")
+                                    page_start = page_start + _MAX_VIRTUAL_PAGE_CHUNK
+                                mem_page_content = self.read_bytes(pid, page_start, final_chunk_size)
+                                if mem_page_content:
+                                    tmpfile.write(mem_page_content)
+                            else:
+                                mem_page_content = self.read_bytes(pid, page_start, page_len)
+                                if mem_page_content:
+                                    tmpfile.write(mem_page_content)
+                        output_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem")
+                    except PermissionError:
+                        logging.warning(f"Permission denied opening process memory for {p_name} (pid {pid}). Cannot dump this process.")
+                        continue
+                    except OSError as oserror:
+                        logging.warning(f"Error opening process memory page for {p_name} (pid {pid}). Error was {oserror}. Dump may be incomplete.")
+                        pass
+        except MemoryError:
+            logging.warning("Exceeded available memory, skipping further memory collection")
 
 
         logging.info(f"Dumping processing has completed. Output file is located: {archive_out}")
diff --git a/varc_core/systems/windows.py b/varc_core/systems/windows.py
@@ -5,10 +5,10 @@
 from os import sep
 from pathlib import Path
 from sys import platform
-from typing import Any, Optional, Tuple
+from typing import Any, Optional, Tuple, Union
 
 from tqdm import tqdm
-from varc_core.systems.base_system import BaseSystem
+from varc_core.systems.base_system import _TarLz4Wrapper, BaseSystem
 
 if platform == "win32": # dont try to import on linux
    from sys import maxsize
@@ -45,11 +45,10 @@ def read_process(self, handle: int, address: int) -> Tuple[Optional[bytes], int]
             logging.warning("Failed to read a memory page")
         return page_bytes, next_region
 
-    def dump_processes(self) -> None:
+    def dump_processes(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None:
         """
         Based on pymem's 'Pattern' module
         """
-        archive_out = self.output_path
         for proc in tqdm(self.process_info, desc="Process dump progess", unit=" procs"):
             # If scanning with YARA, only dump processes if they triggered a rule
             if self.yara_hit_pids:
@@ -74,12 +73,11 @@ def dump_processes(self) -> None:
 
             # Dump all pages the process virtual address space
             next_region = 0
-            with zipfile.ZipFile(archive_out, 'a', compression=zipfile.ZIP_DEFLATED) as zip_file:
-                with tempfile.NamedTemporaryFile(mode="w+b", buffering=0, delete=False) as tmpfile:
-                    while next_region < user_space_limit:
-                        proc_page_bytes, next_region = self.read_process(p.process_handle, next_region)
-                        if proc_page_bytes:
-                            tmpfile.write(proc_page_bytes)
-                    zip_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem")
-                del_file(tmpfile.name)
+            with tempfile.NamedTemporaryFile(mode="w+b", buffering=0, delete=False) as tmpfile:
+                while next_region < user_space_limit:
+                    proc_page_bytes, next_region = self.read_process(p.process_handle, next_region)
+                    if proc_page_bytes:
+                        tmpfile.write(proc_page_bytes)
+                output_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem")
+            del_file(tmpfile.name)
         logging.info(f"Dumping processing has completed. Output file is located: {archive_out}")