Skip to content

Commit

Permalink
test(chunk-upload): Add a test for uploading multiple debug files (#2274
Browse files Browse the repository at this point in the history
)

This test ensures that the correct chunks are sent to the server when
multiple debug files are being uploaded.

Note that our chunk uploading code does not guarantee that the chunks
appear in any particular order within the request. Only the invariant
that all chunks get uploaded (in any arbitrary order) is guaranteed.
Because of this, we need to parse the request body into an unordered set
of all chunks, and compare this against the set of chunks we expect to
receive.

ref #2194
  • Loading branch information
szokeasaurusrex authored Nov 27, 2024
1 parent 04cdedf commit e6e8bf3
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 2 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This directory contains a set of three macOS Rust executables, compiled as a debug build, and thus containing debug information.
We use these executables to test the chunk upload functionality of the Sentry CLI.
Binary file not shown.
Binary file not shown.
Binary file not shown.
97 changes: 96 additions & 1 deletion tests/integration/debug_files/upload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{fs, str};

use regex::bytes::Regex;

use crate::integration::{AssertCommand, MockEndpointBuilder, TestManager};
use crate::integration::{chunk_upload, AssertCommand, MockEndpointBuilder, TestManager};

/// This regex is used to extract the boundary from the content-type header.
/// We need to match the boundary, since it changes with each request.
Expand Down Expand Up @@ -285,3 +285,98 @@ fn ensure_correct_chunk_upload() {
.with_default_token()
.run_and_assert(AssertCommand::Success);
}

#[test]
/// This test verifies a correct chunk upload of multiple debug files.
fn chunk_upload_multiple_files() {
let expected_chunk_body = fs::read(
"tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin",
)
.expect("expected chunk body file should be present");
// This is the boundary used in the expected request file.
// It was randomly generated when the expected request was recorded.
let boundary_of_expected_request = "------------------------b26LKrHFvpOPfwMoDhYNY8";

let is_first_assemble_call = AtomicBool::new(true);
TestManager::new()
.mock_endpoint(
MockEndpointBuilder::new("GET", "/api/0/organizations/wat-org/chunk-upload/")
.with_response_file("debug_files/get-chunk-upload.json"),
)
.mock_endpoint(
MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/")
.with_response_fn(move |request| {
let boundary = chunk_upload::boundary_from_request(request)
.expect("content-type header should be a valid multipart/form-data header");

let body = request.body().expect("body should be readable");

let chunks = chunk_upload::split_chunk_body(body, boundary)
.expect("body should be a valid multipart/form-data body");

let expected_chunks = chunk_upload::split_chunk_body(
&expected_chunk_body,
boundary_of_expected_request,
)
.expect("expected chunk body is a valid multipart/form-data body");

// Using assert! because in case of failure, the output with assert_eq!
// is too long to be useful.
assert!(
chunks == expected_chunks,
"Uploaded chunks differ from the expected chunks"
);

vec![]
}),
)
.mock_endpoint(
MockEndpointBuilder::new(
"POST",
"/api/0/projects/wat-org/wat-project/files/difs/assemble/",
)
.with_header_matcher("content-type", "application/json")
.with_response_fn(move |_| {
if is_first_assemble_call.swap(false, Ordering::Relaxed) {
r#"{
"6e217f035ed538d4d6c14129baad5cb52e680e74": {
"state": "not_found",
"missingChunks": ["6e217f035ed538d4d6c14129baad5cb52e680e74"]
},
"500848b7815119669a292f2ae1f44af11d7aa2d3": {
"state": "not_found",
"missingChunks": ["500848b7815119669a292f2ae1f44af11d7aa2d3"]
},
"fc27d95861d56fe16a2b66150e31652b76e8c678": {
"state": "not_found",
"missingChunks": ["fc27d95861d56fe16a2b66150e31652b76e8c678"]
}
}"#
} else {
r#"{
"6e217f035ed538d4d6c14129baad5cb52e680e74": {
"state": "created",
"missingChunks": []
},
"500848b7815119669a292f2ae1f44af11d7aa2d3": {
"state": "created",
"missingChunks": []
},
"fc27d95861d56fe16a2b66150e31652b76e8c678": {
"state": "created",
"missingChunks": []
}
}"#
}
.into()
})
.expect(2),
)
.assert_cmd(vec![
"debug-files",
"upload",
"tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files",
])
.with_default_token()
.run_and_assert(AssertCommand::Success);
}
2 changes: 1 addition & 1 deletion tests/integration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use std::io;
use std::path::Path;

use test_utils::MockEndpointBuilder;
use test_utils::{env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
use test_utils::{chunk_upload, env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};

pub const UTC_DATE_FORMAT: &str = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6,9}Z";
const VERSION: &str = env!("CARGO_PKG_VERSION");
Expand Down
107 changes: 107 additions & 0 deletions tests/integration/test_utils/chunk_upload.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//! Utilities for chunk upload tests.
use std::collections::HashSet;
use std::error::Error;
use std::str;
use std::sync::LazyLock;

use mockito::Request;
use regex::bytes::Regex;

/// This regex is used to extract the boundary from the content-type header.
/// We need to match the boundary, since it changes with each request.
/// The regex matches the format as specified in
/// https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
static CONTENT_TYPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"^multipart\/form-data; boundary=(?<boundary>[\w'\(\)+,\-\.\/:=? ]{0,69}[\w'\(\)+,\-\.\/:=?])$"#
)
.expect("Regex is valid")
});

/// A trait which abstracts over accessing headers from a mock request.
/// Allows future compatibility in case we switch to a different mock library.
pub trait HeaderContainer {
fn header(&self, header_name: &str) -> Vec<&[u8]>;
}

impl HeaderContainer for Request {
fn header(&self, header_name: &str) -> Vec<&[u8]> {
self.header(header_name)
.iter()
.map(|h| h.as_bytes())
.collect()
}
}

/// Split a multipart/form-data body into its constituent chunks.
/// The chunks are returned as a set, since chunk uploading code
/// does not guarantee any specific order of the chunks in the body.
/// We only want to check the invariant that each expected chunk is
/// in the body, not the order of the chunks.
pub fn split_chunk_body<'b>(
body: &'b [u8],
boundary: &str,
) -> Result<HashSet<&'b [u8]>, Box<dyn Error>> {
let escaped_boundary = regex::escape(boundary);

let inner_body = entire_body_regex(&escaped_boundary)
.captures(body)
.ok_or("body does not match multipart form regex")?
.name("body")
.expect("the regex has a \"body\" capture group which should always match")
.as_bytes();

// Using HashSet does have the small disadvantage that we don't
// preserve the count of any duplicate chunks, so our tests will
// fail to detect when the same chunk is included multiple times
// (this would be a bug). But, this way, we don't need to keep
// track of counts of chunks.
Ok(boundary_regex(&escaped_boundary)
.split(inner_body)
.collect())
}

/// Extract the boundary from a multipart/form-data request content-type header.
/// Returns an error if the content-type header is not present exactly once,
/// if the content-type does not match the multipart/form-data regex, or if the
/// boundary is not valid UTF-8.
pub fn boundary_from_request(request: &impl HeaderContainer) -> Result<&str, Box<dyn Error>> {
let content_type_headers = request.header("content-type");

if content_type_headers.len() != 1 {
return Err(format!(
"content-type header should be present exactly once, found {} times",
content_type_headers.len()
)
.into());
}

let content_type = content_type_headers[0];

let boundary = CONTENT_TYPE_REGEX
.captures(content_type)
.ok_or("content-type does not match multipart/form-data regex")?
.name("boundary")
.expect("if the regex matches, the boundary should match as well.")
.as_bytes();

Ok(str::from_utf8(boundary)?)
}

/// Given the regex-escaped boundary of a multipart form, return a regex which
/// should match the entire body of the form. The regex includes a named capture
/// group for the body (named "body"), which includes everything from the first starting
/// boundary to the final ending boundary (non-inclusive of the boundaries).
/// May panic if the boundary is not regex-escaped.
fn entire_body_regex(regex_escaped_boundary: &str) -> Regex {
Regex::new(&format!(
r#"^--{regex_escaped_boundary}(?<body>(?s-u:.*?))--{regex_escaped_boundary}--\s*$"#
))
.expect("This regex should be valid")
}

/// Given the regex-escaped boundary of a multipart form, return a regex which
/// matches the start of a section of the form.
fn boundary_regex(regex_escaped_boundary: &str) -> Regex {
Regex::new(&format!(r#"--{regex_escaped_boundary}"#)).expect("This regex should be valid")
}
1 change: 1 addition & 0 deletions tests/integration/test_utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! A collection of utilities for integration tests.
pub mod chunk_upload;
pub mod env;

mod mock_common_endpoints;
Expand Down

0 comments on commit e6e8bf3

Please sign in to comment.