From 852217e8d4b859ed46908ed85a26ed71f2d6906a Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 7 Jun 2024 10:15:49 +0200 Subject: [PATCH] Optimize and make `pyreport` output generic over `impl Write` (#5) Avoids a bunch of intermediate `format!` calls/allocations. Being generic over `impl Write` also means it can output to a `Vec` directly, instead of having to deal with temporary files in tests. As an added bonus, this wraps the actual file I/O in a `BufWriter` to reduce the number of syscalls. --- src/report/pyreport/chunks.rs | 95 ++++++++++--------------- src/report/pyreport/mod.rs | 15 +++- src/report/pyreport/report_json.rs | 109 ++++++++++------------------- 3 files changed, 84 insertions(+), 135 deletions(-) diff --git a/src/report/pyreport/chunks.rs b/src/report/pyreport/chunks.rs index b5363a7..7250f47 100644 --- a/src/report/pyreport/chunks.rs +++ b/src/report/pyreport/chunks.rs @@ -1,4 +1,4 @@ -use std::{fs::File, io::Write}; +use std::io::Write; use serde_json::{json, json_internal}; @@ -52,7 +52,7 @@ fn query_chunks_file_header(report: &SqliteReport) -> Result { /// current chunk, even if there is no data for lines 11-16. fn maybe_write_current_line( current_line: Option<(i64, JsonVal)>, - output_file: &mut File, + output: &mut impl Write, last_populated_line: i64, ) -> Result { if let Some((line_no, line_values)) = current_line { @@ -60,14 +60,13 @@ fn maybe_write_current_line( // to write any newlines. If last_populated_line is 0 (its starting value) and // we're dealing with 1 (the first line), same story. for _ in last_populated_line..line_no - 1 { - let _ = output_file.write("\n".as_bytes())?; + writeln!(output)?; } // Every line is preceded by, but not followed by, a newline. When starting a // new chunk, the cursor will be at the end of the header object on the // line before data is supposed to start. - let _ = output_file - .write(format!("\n{}", array_without_trailing_nulls(line_values)).as_bytes())?; + write!(output, "\n{}", array_without_trailing_nulls(line_values))?; Ok(line_no) } else { // We don't have a new line to print. Return the old value for @@ -219,10 +218,12 @@ fn build_datapoint_from_row(row: &rusqlite::Row) -> Result> { /// Builds a chunks file from a [`SqliteReport`] and writes it to `output_file`. /// See [`crate::report::pyreport`] for more details about the content and /// structure of a chunks file. -pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<()> { +pub fn sql_to_chunks(report: &SqliteReport, output: &mut impl Write) -> Result<()> { let chunks_file_header = query_chunks_file_header(report)?; - let _ = output_file - .write(format!("{}{}", chunks_file_header, CHUNKS_FILE_HEADER_TERMINATOR).as_bytes())?; + write!( + output, + "{chunks_file_header}{CHUNKS_FILE_HEADER_TERMINATOR}" + )?; // TODO: query from chunk_indices rather than samples in case there are chunks // with no samples? @@ -252,7 +253,7 @@ pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<() }; if is_new_chunk || is_new_line { last_populated_line = - maybe_write_current_line(current_report_line, output_file, last_populated_line)?; + maybe_write_current_line(current_report_line, output, last_populated_line)?; current_report_line = Some(build_report_line_from_row(row)?); if is_new_chunk { @@ -267,14 +268,12 @@ pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<() } else { CHUNKS_FILE_END_OF_CHUNK }; - let _ = output_file.write( - format!( - "{}{}", - delimiter, - json!({"present_sessions": present_sessions}) - ) - .as_bytes(), + write!( + output, + "{delimiter}{}", + json!({"present_sessions": present_sessions}) )?; + current_chunk = Some(chunk_index); last_populated_line = 0; } @@ -307,17 +306,14 @@ pub fn sql_to_chunks(report: &SqliteReport, output_file: &mut File) -> Result<() // The loop writes each line when it gets to the first row from the next line. // There are no rows following the last line, so we have to manually write // it here. - maybe_write_current_line(current_report_line, output_file, last_populated_line)?; + maybe_write_current_line(current_report_line, output, last_populated_line)?; Ok(()) } #[cfg(test)] mod tests { - use std::{ - io::{Read, Seek}, - path::PathBuf, - }; + use std::path::PathBuf; use serde_json::{json, json_internal}; use tempfile::TempDir; @@ -895,34 +891,25 @@ mod tests { #[test] fn test_maybe_write_current_line() { - let ctx = setup(); - - let test_cases = [ - (None, 1, 1, ""), - (None, 2, 2, ""), - (Some((3, json!(["foo"]))), 1, 3, "\n\n[\"foo\"]"), - (Some((7, json!(["foo"]))), 5, 7, "\n\n[\"foo\"]"), - (Some((7, json!(["foo"]))), 7, 7, "\n[\"foo\"]"), + let test_cases: [(_, _, _, &[u8]); 6] = [ + (None, 1, 1, b""), + (None, 2, 2, b""), + (Some((3, json!(["foo"]))), 1, 3, b"\n\n[\"foo\"]"), + (Some((7, json!(["foo"]))), 5, 7, b"\n\n[\"foo\"]"), + (Some((7, json!(["foo"]))), 7, 7, b"\n[\"foo\"]"), // Shouldn't happen, just documenting behavior - (Some((7, json!(["foo"]))), 8, 7, "\n[\"foo\"]"), + (Some((7, json!(["foo"]))), 8, 7, b"\n[\"foo\"]"), ]; - for test_case in test_cases { - let mut dummy_file = File::options() - .create(true) - .truncate(true) - .read(true) - .write(true) - .open(ctx.temp_dir.path().join("dummy.txt")) - .unwrap(); - + for (current_line, previous_last_populated, expected_last_populated, expected_output) in + test_cases + { + let mut output = Vec::new(); let last_populated_line = - maybe_write_current_line(test_case.0, &mut dummy_file, test_case.1).unwrap(); - assert_eq!(last_populated_line, test_case.2); + maybe_write_current_line(current_line, &mut output, previous_last_populated) + .unwrap(); + assert_eq!(last_populated_line, expected_last_populated); - let _ = dummy_file.rewind(); - let mut file_str = String::new(); - let _ = dummy_file.read_to_string(&mut file_str); - assert_eq!(file_str, test_case.3); + assert_eq!(&output, expected_output); } } @@ -945,20 +932,10 @@ mod tests { fn test_sql_to_chunks() { let ctx = setup(); let report = build_sample_report(ctx.temp_dir.path().join("db.sqlite")).unwrap(); - let chunks_path = ctx.temp_dir.path().join("chunks.txt"); - let mut chunks_file = File::options() - .create(true) - .truncate(true) - .read(true) - .write(true) - .open(&chunks_path) - .unwrap(); - - sql_to_chunks(&report, &mut chunks_file).unwrap(); - - let mut chunks = String::new(); - let _ = chunks_file.rewind().unwrap(); - let _ = chunks_file.read_to_string(&mut chunks).unwrap(); + + let mut chunks = Vec::new(); + sql_to_chunks(&report, &mut chunks).unwrap(); + let chunks = String::from_utf8(chunks).unwrap(); let chunks_header = json!({"labels_index": {"1": "test-case", "2": "test-case 2"}}); // line_1 variable in build_sample_report() diff --git a/src/report/pyreport/mod.rs b/src/report/pyreport/mod.rs index 706dc2f..1d92de9 100644 --- a/src/report/pyreport/mod.rs +++ b/src/report/pyreport/mod.rs @@ -256,7 +256,10 @@ * - [`CoverageDatapoint`](https://github.com/codecov/shared/blob/f6c2c3852530192ab0c6b9fd0c0a800c2cbdb16f/shared/reports/types.py#L98) */ -use std::fs::File; +use std::{ + fs::File, + io::{BufWriter, Write}, +}; use super::SqliteReport; use crate::error::Result; @@ -276,8 +279,14 @@ pub trait ToPyreport { impl ToPyreport for SqliteReport { fn to_pyreport(&self, report_json_file: &mut File, chunks_file: &mut File) -> Result<()> { - report_json::sql_to_report_json(self, report_json_file)?; - chunks::sql_to_chunks(self, chunks_file)?; + let mut writer = BufWriter::new(report_json_file); + report_json::sql_to_report_json(self, &mut writer)?; + writer.flush()?; + + let mut writer = BufWriter::new(chunks_file); + chunks::sql_to_chunks(self, &mut writer)?; + writer.flush()?; + Ok(()) } } diff --git a/src/report/pyreport/report_json.rs b/src/report/pyreport/report_json.rs index 295b121..c801338 100644 --- a/src/report/pyreport/report_json.rs +++ b/src/report/pyreport/report_json.rs @@ -1,4 +1,4 @@ -use std::{fs::File, io::Write}; +use std::io::Write; use serde_json::{json, json_internal}; @@ -35,7 +35,7 @@ fn calculate_coverage_pct(hits: i64, lines: i64) -> String { /// /// See [`crate::report::pyreport`] for more details about the content and /// structure of a report JSON. -fn sql_to_files_dict(report: &SqliteReport, output_file: &mut File) -> Result<()> { +fn sql_to_files_dict(report: &SqliteReport, output: &mut impl Write) -> Result<()> { let mut stmt = report .conn .prepare_cached(include_str!("queries/files_to_report_json.sql"))?; @@ -43,13 +43,12 @@ fn sql_to_files_dict(report: &SqliteReport, output_file: &mut File) -> Result<() fn maybe_write_current_file( current_file: &Option<(String, JsonVal)>, - output_file: &mut File, + output: &mut impl Write, first_file: bool, ) -> Result<()> { if let Some((current_path, current_file)) = ¤t_file { let delimiter = if first_file { "" } else { "," }; - let _ = output_file - .write(format!("{}\"{}\": {}", delimiter, current_path, current_file).as_bytes())?; + write!(output, "{delimiter}\"{current_path}\": {current_file}")?; } Ok(()) } @@ -126,7 +125,7 @@ fn sql_to_files_dict(report: &SqliteReport, output_file: &mut File) -> Result<() // Write the "files" key to the output file and build its value by iterating // over our query results. It's the caller's responsibility to write // surroundings {}s or ,s as needed. - let _ = output_file.write("\"files\": {".as_bytes())?; + write!(output, "\"files\": {{")?; // Each row in our query results corresponds to a single session, and a file can // have several sessions. We build up the current file over many rows, and @@ -147,7 +146,7 @@ fn sql_to_files_dict(report: &SqliteReport, output_file: &mut File) -> Result<() // effectively no-op `maybe_write_current_file()` and leave `first_file` // as `false` in that case and then begin building the first file. if is_new_file { - maybe_write_current_file(¤t_file, output_file, first_file)?; + maybe_write_current_file(¤t_file, output, first_file)?; first_file = current_file.is_none(); current_file = Some(build_file_from_row(row)?); } @@ -179,9 +178,9 @@ fn sql_to_files_dict(report: &SqliteReport, output_file: &mut File) -> Result<() // The loop writes each file when it gets to the first row from the next file. // There are no rows following the last file, so we have to manually write // it here. - maybe_write_current_file(¤t_file, output_file, first_file)?; + maybe_write_current_file(¤t_file, output, first_file)?; - let _ = output_file.write("}".as_bytes())?; + write!(output, "}}")?; Ok(()) } @@ -202,7 +201,7 @@ fn sql_to_files_dict(report: &SqliteReport, output_file: &mut File) -> Result<() /// /// See [`crate::report::pyreport`] for more details about the content and /// structure of a report JSON. -fn sql_to_sessions_dict(report: &SqliteReport, output_file: &mut File) -> Result<()> { +fn sql_to_sessions_dict(report: &SqliteReport, output: &mut impl Write) -> Result<()> { let mut stmt = report .conn .prepare_cached(include_str!("queries/sessions_to_report_json.sql"))?; @@ -293,37 +292,35 @@ fn sql_to_sessions_dict(report: &SqliteReport, output_file: &mut File) -> Result // Write the "sessions" key to the output file and build its value by iterating // over our query results. It's the caller's responsibility to write // surroundings {}s or ,s as needed. - let _ = output_file.write("\"sessions\": {".as_bytes())?; + write!(output, "\"sessions\": {{")?; let mut first_session = true; while let Some(row) = rows.next()? { let (session_id, session) = build_session_from_row(row)?; // No preceding , for the first session we write let delimiter = if first_session { "" } else { "," }; - let _ = output_file - .write(format!("{}\"{}\": {}", delimiter, session_id, session).as_bytes())?; + write!(output, "{delimiter}\"{session_id}\": {session}")?; first_session = false; } - - let _ = output_file.write("}".as_bytes())?; + write!(output, "}}")?; Ok(()) } /// Builds a report JSON from a [`SqliteReport`] and writes it to `output_file`. /// See [`crate::report::pyreport`] for more details about the content and /// structure of a report JSON. -pub fn sql_to_report_json(report: &SqliteReport, output_file: &mut File) -> Result<()> { - let _ = output_file.write("{".as_bytes())?; - sql_to_files_dict(report, output_file)?; - let _ = output_file.write(",".as_bytes())?; - sql_to_sessions_dict(report, output_file)?; - let _ = output_file.write("}".as_bytes())?; +pub fn sql_to_report_json(report: &SqliteReport, output: &mut impl Write) -> Result<()> { + write!(output, "{{")?; + sql_to_files_dict(report, output)?; + write!(output, ",")?; + sql_to_sessions_dict(report, output)?; + write!(output, "}}")?; Ok(()) } #[cfg(test)] mod tests { - use std::{io::Seek, path::PathBuf}; + use std::path::PathBuf; use serde_json::{json, json_internal}; use tempfile::TempDir; @@ -616,21 +613,13 @@ mod tests { fn test_sql_to_files_dict() { let ctx = setup(); let report = build_sample_report(ctx.temp_dir.path().join("db.sqlite")).unwrap(); - let report_json_path = ctx.temp_dir.path().join("report_json.json"); - let mut report_json_file = File::options() - .create(true) - .truncate(true) - .read(true) - .write(true) - .open(&report_json_path) - .unwrap(); - let _ = report_json_file.write("{".as_bytes()).unwrap(); - sql_to_files_dict(&report, &mut report_json_file).unwrap(); - let _ = report_json_file.write("}".as_bytes()).unwrap(); + let mut files_output = Vec::new(); + files_output.push(b'{'); + sql_to_files_dict(&report, &mut files_output).unwrap(); + files_output.push(b'}'); - let _ = report_json_file.rewind().unwrap(); - let files_dict: JsonVal = serde_json::from_reader(&report_json_file).unwrap(); + let files_dict: JsonVal = serde_json::from_slice(&files_output).unwrap(); let expected = json!({ "files": { @@ -698,28 +687,20 @@ mod tests { } }); - assert_eq!(files_dict, expected,); + assert_eq!(files_dict, expected); } #[test] fn test_sql_to_sessions_dict() { let ctx = setup(); let report = build_sample_report(ctx.temp_dir.path().join("db.sqlite")).unwrap(); - let report_json_path = ctx.temp_dir.path().join("report_json.json"); - let mut report_json_file = File::options() - .create(true) - .truncate(true) - .read(true) - .write(true) - .open(&report_json_path) - .unwrap(); - let _ = report_json_file.write("{".as_bytes()).unwrap(); - sql_to_sessions_dict(&report, &mut report_json_file).unwrap(); - let _ = report_json_file.write("}".as_bytes()).unwrap(); + let mut sessions_output = Vec::new(); + sessions_output.push(b'{'); + sql_to_sessions_dict(&report, &mut sessions_output).unwrap(); + sessions_output.push(b'}'); - let _ = report_json_file.rewind().unwrap(); - let sessions_dict: JsonVal = serde_json::from_reader(&report_json_file).unwrap(); + let sessions_dict: JsonVal = serde_json::from_slice(&sessions_output).unwrap(); let expected = json!({ "sessions": { @@ -791,19 +772,10 @@ mod tests { fn test_sql_to_report_json() { let ctx = setup(); let report = build_sample_report(ctx.temp_dir.path().join("db.sqlite")).unwrap(); - let report_json_path = ctx.temp_dir.path().join("report_json.json"); - let mut report_json_file = File::options() - .create(true) - .truncate(true) - .read(true) - .write(true) - .open(&report_json_path) - .unwrap(); - sql_to_report_json(&report, &mut report_json_file).unwrap(); - - let _ = report_json_file.rewind().unwrap(); - let report_json: JsonVal = serde_json::from_reader(&report_json_file).unwrap(); + let mut report_output = Vec::new(); + sql_to_report_json(&report, &mut report_output).unwrap(); + let report_json: JsonVal = serde_json::from_slice(&report_output).unwrap(); // All of the totals are the same as in previous test cases so they have been // collapsed/uncommented for brevity @@ -866,19 +838,10 @@ mod tests { assert_eq!(report_json, expected); let empty_report = SqliteReport::new(ctx.temp_dir.path().join("empty.db")).unwrap(); - let report_json_path = ctx.temp_dir.path().join("report_json.json"); - let mut report_json_file = File::options() - .create(true) - .truncate(true) - .read(true) - .write(true) - .open(&report_json_path) - .unwrap(); - - sql_to_report_json(&empty_report, &mut report_json_file).unwrap(); - let _ = report_json_file.rewind().unwrap(); - let report_json: JsonVal = serde_json::from_reader(&report_json_file).unwrap(); + let mut report_output = Vec::new(); + sql_to_report_json(&empty_report, &mut report_output).unwrap(); + let report_json: JsonVal = serde_json::from_slice(&report_output).unwrap(); let expected = json!({"files": {}, "sessions": {}}); assert_eq!(report_json, expected);