Skip to content

Commit

Permalink
Fix for downloading grr timeline flow results (#630)
Browse files Browse the repository at this point in the history
* Adding support for GRR timeline flow results. If the given GRR flow ID is a TimelineFlow, the _DownloadFiles function will now collect the resulting timeline body file instead of the ArtifactArchive. This change allows the grr_flow_ts recipe to also proces timeline flow results.

* Adding missing import of shutil for using copy2 function.

* Updated the `grr_host.py` unit tests to reflect the changes made to the `_DownloadFiles` function.

* Fixing pylint error for the `_mock_Get` argument in the `testNotDownloadFilesForExistingFlow` function.

* Shortening some lines to make the pylinter happy.

* Shortened another long line to make the pylinter happy.
  • Loading branch information
jkppr authored Jul 12, 2022
1 parent 02bed44 commit 4ae7973
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 24 deletions.
43 changes: 30 additions & 13 deletions dftimewolf/lib/collectors/grr_hosts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import datetime
import os
import re
import shutil
import time
import zipfile
from typing import List, Optional, Tuple, Type
Expand Down Expand Up @@ -315,38 +316,54 @@ def _CheckSkippedFlows(self) -> None:

# TODO: change object to more specific GRR type information.
def _DownloadFiles(self, client: Client, flow_id: str) -> Optional[str]:
"""Download files from the specified flow.
"""Download files/results from the specified flow.
Args:
client (object): GRR Client object to which to download flow data from.
flow_id (str): GRR identifier of the flow.
Returns:
str: path of the zipfile containing downloaded files.
str: path containing the downloaded files.
"""
output_file_path = os.path.join(
self.output_path, '.'.join((flow_id, 'zip')))
flow = client.Flow(flow_id)
is_timeline_flow = False
if flow.Get().data.name == 'TimelineFlow':
is_timeline_flow = True
output_file_path = os.path.join(
self.output_path, '.'.join((flow_id, 'body')))
else:
output_file_path = os.path.join(
self.output_path, '.'.join((flow_id, 'zip')))

if os.path.exists(output_file_path):
self.logger.info(
'{0:s} already exists: Skipping'.format(output_file_path))
return None

flow = client.Flow(flow_id)
file_archive = flow.GetFilesArchive()
if is_timeline_flow:
file_archive = flow.GetCollectedTimelineBody()
else:
file_archive = flow.GetFilesArchive()

file_archive.WriteToFile(output_file_path)

# Unzip archive for processing and remove redundant zip
fqdn = client.data.os_info.fqdn.lower()
client_output_file = os.path.join(self.output_path, fqdn, flow_id)
if not os.path.isdir(client_output_file):
os.makedirs(client_output_file)

with zipfile.ZipFile(output_file_path) as archive:
archive.extractall(path=client_output_file)
client_output_folder = os.path.join(self.output_path, fqdn, flow_id)
if not os.path.isdir(client_output_folder):
os.makedirs(client_output_folder)

if is_timeline_flow:
shutil.copy2(
output_file_path,
os.path.join(client_output_folder,
'{}_timeline.body'.format(flow_id)))
else:
with zipfile.ZipFile(output_file_path) as archive:
archive.extractall(path=client_output_folder)
os.remove(output_file_path)

return client_output_file
return client_output_folder

def GetThreadPoolSize(self) -> int:
"""Thread pool size."""
Expand Down
59 changes: 48 additions & 11 deletions tests/lib/collectors/grr_hosts.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,38 +144,75 @@ def testAwaitFlowGRRError(self, mock_FlowGet):
@mock.patch('os.makedirs')
@mock.patch('zipfile.ZipFile')
@mock.patch('grr_api_client.flow.FlowBase.GetFilesArchive')
def testDownloadFilesForFlow(self, mock_GetFilesArchive, mock_ZipFile,
mock_makedirs, mock_isdir, mock_remove):
"""Tests that files are downloaded and unzipped in the correct
directories."""
@mock.patch('grr_api_client.flow.FlowBase.Get')
def testDownloadArtifactFilesForFlow(self, mock_Get, mock_GetFilesArchive,
mock_ZipFile, mock_makedirs, mock_isdir,
mock_remove):
"""Test if results are downloaded & unzipped in the correct directories."""
# Change output_path to something constant so we can easily assert
# if calls were done correctly.
self.grr_flow_module.output_path = '/tmp/random'
mock_isdir.return_value = False # Return false so makedirs is called
mock_Get.return_value.data.name = 'ArtifactFlow'

return_value = self.grr_flow_module._DownloadFiles(
mock_grr_hosts.MOCK_CLIENT, "F:12345")
mock_grr_hosts.MOCK_CLIENT, 'F:12345')
self.assertEqual(return_value, '/tmp/random/tomchop/F:12345')
mock_GetFilesArchive.assert_called_once()
mock_ZipFile.assert_called_once_with('/tmp/random/F:12345.zip')
mock_isdir.assert_called_once_with('/tmp/random/tomchop/F:12345')
mock_makedirs.assert_called_once_with('/tmp/random/tomchop/F:12345')
mock_remove.assert_called_once_with('/tmp/random/F:12345.zip')

@mock.patch('os.remove')
@mock.patch('os.path.isdir')
@mock.patch('os.makedirs')
@mock.patch('zipfile.ZipFile')
@mock.patch('shutil.copy2')
@mock.patch('grr_api_client.flow.FlowBase.GetFilesArchive')
@mock.patch('grr_api_client.flow.FlowBase.GetCollectedTimelineBody')
@mock.patch('grr_api_client.flow.FlowBase.Get')
def testDownloadTimelineBodyForFlow(self, mock_Get,
mock_GetCollectedTimelineBody,
mock_GetFilesArchive, mock_copy2,
mock_ZipFile, mock_makedirs, mock_isdir,
mock_remove):
"""Tests if timeline results are downloaded in the correct directories."""
# Change output_path to something constant so we can easily assert
# if calls were done correctly.
self.grr_flow_module.output_path = '/tmp/random'
mock_isdir.return_value = False # Return false so makedirs is called
mock_Get.return_value.data.name = 'TimelineFlow'

return_value = self.grr_flow_module._DownloadFiles(
mock_grr_hosts.MOCK_CLIENT, 'F:12345')
self.assertEqual(return_value, '/tmp/random/tomchop/F:12345')
mock_GetCollectedTimelineBody.assert_called_once()
mock_GetFilesArchive.assert_not_called()
mock_ZipFile.assert_not_called()
mock_copy2.assert_called_once_with(
'/tmp/random/F:12345.body',
'/tmp/random/tomchop/F:12345/F:12345_timeline.body')
mock_isdir.assert_called_once_with('/tmp/random/tomchop/F:12345')
mock_makedirs.assert_called_once_with('/tmp/random/tomchop/F:12345')
mock_remove.assert_called_once_with('/tmp/random/F:12345.body')

@mock.patch('os.path.exists')
@mock.patch('grr_api_client.flow.FlowBase.GetFilesArchive')
def testNotDownloadFilesForExistingFlow(self, mock_GetFilesArchive,
mock_exists):
"""Tests that files are downloaded and unzipped in the correct
directories."""
@mock.patch('grr_api_client.flow.FlowBase.GetCollectedTimelineBody')
@mock.patch('grr_api_client.flow.FlowBase.Get')
def testNotDownloadFilesForExistingFlow(self, _mock_Get,
mock_GetCollectedTimelineBody,
mock_GetFilesArchive, mock_exists):
"""Tests if results are not downloaded if the directory already exists."""
# Change output_path to something constant so we can easily assert
# if calls were done correctly.
self.grr_flow_module.output_path = '/tmp/random'
mock_exists.return_value = True # Simulate existing flow directory

self.grr_flow_module._DownloadFiles(mock_grr_hosts.MOCK_CLIENT, "F:12345")
self.grr_flow_module._DownloadFiles(mock_grr_hosts.MOCK_CLIENT, 'F:12345')
mock_GetFilesArchive.assert_not_called()

mock_GetCollectedTimelineBody.assert_not_called()

class GRRArtifactCollectorTest(unittest.TestCase):
"""Tests for the GRR artifact collector."""
Expand Down

0 comments on commit 4ae7973

Please sign in to comment.