Skip to content

Commit

Permalink
test: behavior with different suffix combinations
Browse files Browse the repository at this point in the history
The test files were created with this script:

    # cd tests/files/suffixes

    # clean
    rm -rf chunks_carve/ extractions/ collisions.zip

    # reproduce output
    seq 100 | gzip > 0-160.gzip
    seq 128 | gzip > 160-375.gzip
    dd if=/dev/zero of=375-512.padding bs=1 count=137
    cat 0-160.gzip 160-375.gzip 375-512.padding > chunks

    unblob --carve-suffix _carve chunks
    cp 0-160.gzip chunks_carve/
    echo something else > chunks_carve/0-160.gzip_extract/gzip.uncompressed

    zip collisions.zip chunks chunks_carve/0-160.gzip chunks_carve/0-160.gzip_extract/gzip.uncompressed

    for input in collisions.zip chunks
    do
      unblob                                   -e extractions/defaults/ $input
      unblob --carve-suffix _carve       -e extractions/_carve_extract/ $input
      unblob --carve-suffix _c --extract-suffix _e -e extractions/_c_e/ $input
    done
  • Loading branch information
e3krisztian committed Nov 28, 2024
1 parent 2767e86 commit 711e3d2
Show file tree
Hide file tree
Showing 29 changed files with 191 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
tests/integration/** filter=lfs diff=lfs merge=lfs -text
tests/files/** filter=lfs diff=lfs merge=lfs -text
3 changes: 3 additions & 0 deletions tests/files/suffixes/chunks
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/files/suffixes/collisions.zip
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/files/suffixes/extractions/_c_e/collisions.zip_e/chunks
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
109 changes: 109 additions & 0 deletions tests/test_processing_suffixes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from pathlib import Path
from typing import Optional

import pytest

from unblob.processing import ExtractionConfig, process_file
from unblob.report import OutputDirectoryExistsReport
from unblob.testing import check_output_is_the_same

TEST_DATA_PATH = Path(__file__).parent / "files/suffixes"


def _patch(extraction_config: ExtractionConfig, carve_suffix, extract_suffix):
extraction_config.keep_extracted_chunks = False
if carve_suffix is not None:
extraction_config.carve_suffix = carve_suffix
if extract_suffix is not None:
extraction_config.extract_suffix = extract_suffix


@pytest.mark.parametrize(
"carve_suffix,extract_suffix,expected_output_dir_name",
[
(None, None, "defaults"),
("_c", "_e", "_c_e"),
("_carve", "_extract", "_carve_extract"),
],
)
def test_top_level_carve(
carve_suffix: Optional[str],
extract_suffix: Optional[str],
expected_output_dir_name: str,
extraction_config: ExtractionConfig,
):
_patch(extraction_config, carve_suffix, extract_suffix)
input_file = TEST_DATA_PATH / "chunks"
carve_dir_name = input_file.name + extraction_config.carve_suffix
extract_dir_name = input_file.name + extraction_config.extract_suffix
expected_output_dir = (
TEST_DATA_PATH / "extractions" / expected_output_dir_name / carve_dir_name
)

reports = process_file(extraction_config, input_file)

assert reports.errors == []

assert (
carve_dir_name == extract_dir_name
or not (extraction_config.extract_root / extract_dir_name).exists()
)
check_output_is_the_same(
expected_output_dir, extraction_config.extract_root / carve_dir_name
)


EXPECTED_COLLISION_PATHS: "dict[tuple[Optional[str], Optional[str]], set]" = {
(None, None): {
"collisions.zip_extract/chunks_carve/0-160.gzip_extract",
},
("_carve", "_extract"): {
"collisions.zip_extract/chunks_carve",
"collisions.zip_extract/chunks_carve/0-160.gzip_extract",
},
}


@pytest.mark.parametrize(
"carve_suffix,extract_suffix,expected_output_dir_name",
[
(None, None, "defaults"),
("_c", "_e", "_c_e"),
("_carve", "_extract", "_carve_extract"),
],
)
def test_top_level_extract_and_collisions(
carve_suffix: Optional[str],
extract_suffix: Optional[str],
expected_output_dir_name: str,
extraction_config: ExtractionConfig,
):
_patch(extraction_config, carve_suffix, extract_suffix)
input_file = TEST_DATA_PATH / "collisions.zip"
carve_dir_name = input_file.name + extraction_config.carve_suffix
extract_dir_name = input_file.name + extraction_config.extract_suffix
expected_output_dir = (
TEST_DATA_PATH / "extractions" / expected_output_dir_name / extract_dir_name
)

reports = process_file(extraction_config, input_file)

# check collision problems - the input was prepared to have collisions
# during both the carving and extracting phases
problem_paths = {
e.path.relative_to(extraction_config.extract_root).as_posix()
for e in reports.errors
if isinstance(e, OutputDirectoryExistsReport)
}
key = (carve_suffix, extract_suffix)
assert problem_paths == EXPECTED_COLLISION_PATHS.get(key, set())
# we expect only OutputDirectoryExistsReport-s
assert len(reports.errors) == len(problem_paths)

assert (
carve_dir_name == extract_dir_name
or not (extraction_config.extract_root / carve_dir_name).exists()
)
check_output_is_the_same(
expected_output_dir, extraction_config.extract_root / extract_dir_name
)

0 comments on commit 711e3d2

Please sign in to comment.