Optimize and make pyreport output generic over impl Write (#5)

Avoids a bunch of intermediate `format!` calls/allocations. Being generic over `impl Write` also means it can output to a `Vec` directly, instead of having to deal with temporary files in tests. As an added bonus, this wraps the actual file I/O in a `BufWriter` to reduce the number of syscalls.
codecov · Jun 7, 2024 · 852217e · 852217e
1 parent d986d4d
commit 852217e
Show file tree

Hide file tree

Showing 3 changed files with 84 additions and 135 deletions.
diff --git a/src/report/pyreport/chunks.rs b/src/report/pyreport/chunks.rs
@@ -1,4 +1,4 @@
-use std::{fs::File, io::Write};
+use std::io::Write;
 
 use serde_json::{json, json_internal};
 
@@ -52,22 +52,21 @@ fn query_chunks_file_header(report: &SqliteReport) -> Result<JsonVal> {
 /// current chunk, even if there is no data for lines 11-16.
 fn maybe_write_current_line(
     current_line: Option<(i64, JsonVal)>,
-    output_file: &mut File,
+    output: &mut impl Write,
     last_populated_line: i64,
 ) -> Result<i64> {
     if let Some((line_no, line_values)) = current_line {
         // If last_populated_line is 6 and we're dealing with 7, this loop does not have
         // to write any newlines. If last_populated_line is 0 (its starting value) and
         // we're dealing with 1 (the first line), same story.
         for _ in last_populated_line..line_no - 1 {
-            let _ = output_file.write("\n".as_bytes())?;
+            writeln!(output)?;
         }
 
         // Every line is preceded by, but not followed by, a newline. When starting a
         // new chunk, the cursor will be at the end of the header object on the
         // line before data is supposed to start.
-        let _ = output_file
-            .write(format!("\n{}", array_without_trailing_nulls(line_values)).as_bytes())?;
+        write!(output, "\n{}", array_without_trailing_nulls(line_values))?;
         Ok(line_no)
     } else {
         // We don't have a new line to print. Return the old value for
@@ -219,10 +218,12 @@ fn build_datapoint_from_row(row: &rusqlite::Row) -> Result<Option<JsonVal>> {
 /// Builds a chunks file from a [`SqliteReport`] and writes it to `output_file`.
 /// See [`crate::report::pyreport`] for more details about the content and
 /// structure of a chunks file.
-pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<()> {
+pub fn sql_to_chunks(report: &SqliteReport, output: &mut impl Write) -> Result<()> {
     let chunks_file_header = query_chunks_file_header(report)?;
-    let _ = output_file
-        .write(format!("{}{}", chunks_file_header, CHUNKS_FILE_HEADER_TERMINATOR).as_bytes())?;
+    write!(
+        output,
+        "{chunks_file_header}{CHUNKS_FILE_HEADER_TERMINATOR}"
+    )?;
 
     // TODO: query from chunk_indices rather than samples in case there are chunks
     // with no samples?
@@ -252,7 +253,7 @@ pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<()
         };
         if is_new_chunk || is_new_line {
             last_populated_line =
-                maybe_write_current_line(current_report_line, output_file, last_populated_line)?;
+                maybe_write_current_line(current_report_line, output, last_populated_line)?;
             current_report_line = Some(build_report_line_from_row(row)?);
 
             if is_new_chunk {
@@ -267,14 +268,12 @@ pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<()
                 } else {
                     CHUNKS_FILE_END_OF_CHUNK
                 };
-                let _ = output_file.write(
-                    format!(
-                        "{}{}",
-                        delimiter,
-                        json!({"present_sessions": present_sessions})
-                    )
-                    .as_bytes(),
+                write!(
+                    output,
+                    "{delimiter}{}",
+                    json!({"present_sessions": present_sessions})
                 )?;
+
                 current_chunk = Some(chunk_index);
                 last_populated_line = 0;
             }
@@ -307,17 +306,14 @@ pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<()
     // The loop writes each line when it gets to the first row from the next line.
     // There are no rows following the last line, so we have to manually write
     // it here.
-    maybe_write_current_line(current_report_line, output_file, last_populated_line)?;
+    maybe_write_current_line(current_report_line, output, last_populated_line)?;
 
     Ok(())
 }
 
 #[cfg(test)]
 mod tests {
-    use std::{
-        io::{Read, Seek},
-        path::PathBuf,
-    };
+    use std::path::PathBuf;
 
     use serde_json::{json, json_internal};
     use tempfile::TempDir;
@@ -895,34 +891,25 @@ mod tests {
 
     #[test]
     fn test_maybe_write_current_line() {
-        let ctx = setup();
-
-        let test_cases = [
-            (None, 1, 1, ""),
-            (None, 2, 2, ""),
-            (Some((3, json!(["foo"]))), 1, 3, "\n\n[\"foo\"]"),
-            (Some((7, json!(["foo"]))), 5, 7, "\n\n[\"foo\"]"),
-            (Some((7, json!(["foo"]))), 7, 7, "\n[\"foo\"]"),
+        let test_cases: [(_, _, _, &[u8]); 6] = [
+            (None, 1, 1, b""),
+            (None, 2, 2, b""),
+            (Some((3, json!(["foo"]))), 1, 3, b"\n\n[\"foo\"]"),
+            (Some((7, json!(["foo"]))), 5, 7, b"\n\n[\"foo\"]"),
+            (Some((7, json!(["foo"]))), 7, 7, b"\n[\"foo\"]"),
             // Shouldn't happen, just documenting behavior
-            (Some((7, json!(["foo"]))), 8, 7, "\n[\"foo\"]"),
+            (Some((7, json!(["foo"]))), 8, 7, b"\n[\"foo\"]"),
         ];
-        for test_case in test_cases {
-            let mut dummy_file = File::options()
-                .create(true)
-                .truncate(true)
-                .read(true)
-                .write(true)
-                .open(ctx.temp_dir.path().join("dummy.txt"))
-                .unwrap();
-
+        for (current_line, previous_last_populated, expected_last_populated, expected_output) in
+            test_cases
+        {
+            let mut output = Vec::new();
             let last_populated_line =
-                maybe_write_current_line(test_case.0, &mut dummy_file, test_case.1).unwrap();
-            assert_eq!(last_populated_line, test_case.2);
+                maybe_write_current_line(current_line, &mut output, previous_last_populated)
+                    .unwrap();
+            assert_eq!(last_populated_line, expected_last_populated);
 
-            let _ = dummy_file.rewind();
-            let mut file_str = String::new();
-            let _ = dummy_file.read_to_string(&mut file_str);
-            assert_eq!(file_str, test_case.3);
+            assert_eq!(&output, expected_output);
         }
     }
 
@@ -945,20 +932,10 @@ mod tests {
     fn test_sql_to_chunks() {
         let ctx = setup();
         let report = build_sample_report(ctx.temp_dir.path().join("db.sqlite")).unwrap();
-        let chunks_path = ctx.temp_dir.path().join("chunks.txt");
-        let mut chunks_file = File::options()
-            .create(true)
-            .truncate(true)
-            .read(true)
-            .write(true)
-            .open(&chunks_path)
-            .unwrap();
-
-        sql_to_chunks(&report, &mut chunks_file).unwrap();
-
-        let mut chunks = String::new();
-        let _ = chunks_file.rewind().unwrap();
-        let _ = chunks_file.read_to_string(&mut chunks).unwrap();
+
+        let mut chunks = Vec::new();
+        sql_to_chunks(&report, &mut chunks).unwrap();
+        let chunks = String::from_utf8(chunks).unwrap();
 
         let chunks_header = json!({"labels_index": {"1": "test-case", "2": "test-case 2"}});
         // line_1 variable in build_sample_report()

diff --git a/src/report/pyreport/mod.rs b/src/report/pyreport/mod.rs
@@ -256,7 +256,10 @@
  * - [`CoverageDatapoint`](https://github.com/codecov/shared/blob/f6c2c3852530192ab0c6b9fd0c0a800c2cbdb16f/shared/reports/types.py#L98)
  */
 
-use std::fs::File;
+use std::{
+    fs::File,
+    io::{BufWriter, Write},
+};
 
 use super::SqliteReport;
 use crate::error::Result;
@@ -276,8 +279,14 @@ pub trait ToPyreport {
 
 impl ToPyreport for SqliteReport {
     fn to_pyreport(&self, report_json_file: &mut File, chunks_file: &mut File) -> Result<()> {
-        report_json::sql_to_report_json(self, report_json_file)?;
-        chunks::sql_to_chunks(self, chunks_file)?;
+        let mut writer = BufWriter::new(report_json_file);
+        report_json::sql_to_report_json(self, &mut writer)?;
+        writer.flush()?;
+
+        let mut writer = BufWriter::new(chunks_file);
+        chunks::sql_to_chunks(self, &mut writer)?;
+        writer.flush()?;
+
         Ok(())
     }
 }