test(chunk-upload): Add a test for uploading multiple debug files (#2274

) This test ensures that the correct chunks are sent to the server when multiple debug files are being uploaded. Note that our chunk uploading code does not guarantee that the chunks appear in any particular order within the request. Only the invariant that all chunks get uploaded (in any arbitrary order) is guaranteed. Because of this, we need to parse the request body into an unordered set of all chunks, and compare this against the set of chunks we expect to receive. ref #2194
getsentry · Nov 27, 2024 · e6e8bf3 · e6e8bf3
1 parent 04cdedf
commit e6e8bf3
Show file tree

Hide file tree

Showing 9 changed files with 207 additions and 2 deletions.
diff --git a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin b/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin
diff --git a/.../integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/README.md b/.../integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/README.md
@@ -0,0 +1,2 @@
+This directory contains a set of three macOS Rust executables, compiled as a debug build, and thus containing debug information.
+We use these executables to test the chunk upload functionality of the Sentry CLI.
diff --git a/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci b/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci
diff --git a/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci-fast b/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci-fast
diff --git a/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/main b/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/main
diff --git a/tests/integration/debug_files/upload.rs b/tests/integration/debug_files/upload.rs
@@ -4,7 +4,7 @@ use std::{fs, str};
 
 use regex::bytes::Regex;
 
-use crate::integration::{AssertCommand, MockEndpointBuilder, TestManager};
+use crate::integration::{chunk_upload, AssertCommand, MockEndpointBuilder, TestManager};
 
 /// This regex is used to extract the boundary from the content-type header.
 /// We need to match the boundary, since it changes with each request.
@@ -285,3 +285,98 @@ fn ensure_correct_chunk_upload() {
         .with_default_token()
         .run_and_assert(AssertCommand::Success);
 }
+
+#[test]
+/// This test verifies a correct chunk upload of multiple debug files.
+fn chunk_upload_multiple_files() {
+    let expected_chunk_body = fs::read(
+        "tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin",
+    )
+    .expect("expected chunk body file should be present");
+    // This is the boundary used in the expected request file.
+    // It was randomly generated when the expected request was recorded.
+    let boundary_of_expected_request = "------------------------b26LKrHFvpOPfwMoDhYNY8";
+
+    let is_first_assemble_call = AtomicBool::new(true);
+    TestManager::new()
+        .mock_endpoint(
+            MockEndpointBuilder::new("GET", "/api/0/organizations/wat-org/chunk-upload/")
+                .with_response_file("debug_files/get-chunk-upload.json"),
+        )
+        .mock_endpoint(
+            MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/")
+                .with_response_fn(move |request| {
+                    let boundary = chunk_upload::boundary_from_request(request)
+                        .expect("content-type header should be a valid multipart/form-data header");
+
+                    let body = request.body().expect("body should be readable");
+
+                    let chunks = chunk_upload::split_chunk_body(body, boundary)
+                        .expect("body should be a valid multipart/form-data body");
+
+                    let expected_chunks = chunk_upload::split_chunk_body(
+                        &expected_chunk_body,
+                        boundary_of_expected_request,
+                    )
+                    .expect("expected chunk body is a valid multipart/form-data body");
+
+                    // Using assert! because in case of failure, the output with assert_eq!
+                    // is too long to be useful.
+                    assert!(
+                        chunks == expected_chunks,
+                        "Uploaded chunks differ from the expected chunks"
+                    );
+
+                    vec![]
+                }),
+        )
+        .mock_endpoint(
+            MockEndpointBuilder::new(
+                "POST",
+                "/api/0/projects/wat-org/wat-project/files/difs/assemble/",
+            )
+            .with_header_matcher("content-type", "application/json")
+            .with_response_fn(move |_| {
+                if is_first_assemble_call.swap(false, Ordering::Relaxed) {
+                    r#"{
+                        "6e217f035ed538d4d6c14129baad5cb52e680e74": {
+                            "state": "not_found",
+                            "missingChunks": ["6e217f035ed538d4d6c14129baad5cb52e680e74"]
+                        },
+                        "500848b7815119669a292f2ae1f44af11d7aa2d3": {
+                            "state": "not_found",
+                            "missingChunks": ["500848b7815119669a292f2ae1f44af11d7aa2d3"]
+                        },
+                        "fc27d95861d56fe16a2b66150e31652b76e8c678": {
+                            "state": "not_found",
+                            "missingChunks": ["fc27d95861d56fe16a2b66150e31652b76e8c678"]
+                        }
+                    }"#
+                } else {
+                    r#"{
+                        "6e217f035ed538d4d6c14129baad5cb52e680e74": {
+                            "state": "created",
+                            "missingChunks": []
+                        },
+                        "500848b7815119669a292f2ae1f44af11d7aa2d3": {
+                            "state": "created",
+                            "missingChunks": []
+                        },
+                        "fc27d95861d56fe16a2b66150e31652b76e8c678": {
+                            "state": "created",
+                            "missingChunks": []
+                        }
+                    }"#
+                }
+                .into()
+            })
+            .expect(2),
+        )
+        .assert_cmd(vec![
+            "debug-files",
+            "upload",
+            "tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files",
+        ])
+        .with_default_token()
+        .run_and_assert(AssertCommand::Success);
+}
diff --git a/tests/integration/mod.rs b/tests/integration/mod.rs
@@ -31,7 +31,7 @@ use std::io;
 use std::path::Path;
 
 use test_utils::MockEndpointBuilder;
-use test_utils::{env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
+use test_utils::{chunk_upload, env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
 
 pub const UTC_DATE_FORMAT: &str = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6,9}Z";
 const VERSION: &str = env!("CARGO_PKG_VERSION");

diff --git a/tests/integration/test_utils/chunk_upload.rs b/tests/integration/test_utils/chunk_upload.rs
@@ -0,0 +1,107 @@
+//! Utilities for chunk upload tests.
+use std::collections::HashSet;
+use std::error::Error;
+use std::str;
+use std::sync::LazyLock;
+
+use mockito::Request;
+use regex::bytes::Regex;
+
+/// This regex is used to extract the boundary from the content-type header.
+/// We need to match the boundary, since it changes with each request.
+/// The regex matches the format as specified in
+/// https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
+static CONTENT_TYPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(
+        r#"^multipart\/form-data; boundary=(?<boundary>[\w'\(\)+,\-\.\/:=? ]{0,69}[\w'\(\)+,\-\.\/:=?])$"#
+    )
+    .expect("Regex is valid")
+});
+
+/// A trait which abstracts over accessing headers from a mock request.
+/// Allows future compatibility in case we switch to a different mock library.
+pub trait HeaderContainer {
+    fn header(&self, header_name: &str) -> Vec<&[u8]>;
+}
+
+impl HeaderContainer for Request {
+    fn header(&self, header_name: &str) -> Vec<&[u8]> {
+        self.header(header_name)
+            .iter()
+            .map(|h| h.as_bytes())
+            .collect()
+    }
+}
+
+/// Split a multipart/form-data body into its constituent chunks.
+/// The chunks are returned as a set, since chunk uploading code
+/// does not guarantee any specific order of the chunks in the body.
+/// We only want to check the invariant that each expected chunk is
+/// in the body, not the order of the chunks.
+pub fn split_chunk_body<'b>(
+    body: &'b [u8],
+    boundary: &str,
+) -> Result<HashSet<&'b [u8]>, Box<dyn Error>> {
+    let escaped_boundary = regex::escape(boundary);
+
+    let inner_body = entire_body_regex(&escaped_boundary)
+        .captures(body)
+        .ok_or("body does not match multipart form regex")?
+        .name("body")
+        .expect("the regex has a \"body\" capture group which should always match")
+        .as_bytes();
+
+    // Using HashSet does have the small disadvantage that we don't
+    // preserve the count of any duplicate chunks, so our tests will
+    // fail to detect when the same chunk is included multiple times
+    // (this would be a bug). But, this way, we don't need to keep
+    // track of counts of chunks.
+    Ok(boundary_regex(&escaped_boundary)
+        .split(inner_body)
+        .collect())
+}
+
+/// Extract the boundary from a multipart/form-data request content-type header.
+/// Returns an error if the content-type header is not present exactly once,
+/// if the content-type does not match the multipart/form-data regex, or if the
+/// boundary is not valid UTF-8.
+pub fn boundary_from_request(request: &impl HeaderContainer) -> Result<&str, Box<dyn Error>> {
+    let content_type_headers = request.header("content-type");
+
+    if content_type_headers.len() != 1 {
+        return Err(format!(
+            "content-type header should be present exactly once, found {} times",
+            content_type_headers.len()
+        )
+        .into());
+    }
+
+    let content_type = content_type_headers[0];
+
+    let boundary = CONTENT_TYPE_REGEX
+        .captures(content_type)
+        .ok_or("content-type does not match multipart/form-data regex")?
+        .name("boundary")
+        .expect("if the regex matches, the boundary should match as well.")
+        .as_bytes();
+
+    Ok(str::from_utf8(boundary)?)
+}
+
+/// Given the regex-escaped boundary of a multipart form, return a regex which
+/// should match the entire body of the form. The regex includes a named capture
+/// group for the body (named "body"), which includes everything from the first starting
+/// boundary to the final ending boundary (non-inclusive of the boundaries).
+/// May panic if the boundary is not regex-escaped.
+fn entire_body_regex(regex_escaped_boundary: &str) -> Regex {
+    Regex::new(&format!(
+        r#"^--{regex_escaped_boundary}(?<body>(?s-u:.*?))--{regex_escaped_boundary}--\s*$"#
+    ))
+    .expect("This regex should be valid")
+}
+
+/// Given the regex-escaped boundary of a multipart form, return a regex which
+/// matches the start of a section of the form.
+fn boundary_regex(regex_escaped_boundary: &str) -> Regex {
+    Regex::new(&format!(r#"--{regex_escaped_boundary}"#)).expect("This regex should be valid")
+}
diff --git a/tests/integration/test_utils/mod.rs b/tests/integration/test_utils/mod.rs
@@ -1,5 +1,6 @@
 //! A collection of utilities for integration tests.
 
+pub mod chunk_upload;
 pub mod env;
 
 mod mock_common_endpoints;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		This directory contains a set of three macOS Rust executables, compiled as a debug build, and thus containing debug information.
		We use these executables to test the chunk upload functionality of the Sentry CLI.