From 452d71bc731731bf5ffb29eb81fa75be4a40d88a Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 3 Sep 2024 12:47:51 +0200 Subject: [PATCH 1/9] Start parsing the `chunks` file with serde This implements a hand-written parser which scans through the `chunks` file line-by-line, and parses the various headers and line records with serde. The most complex part here is parsing the line records. If that complexity starts to be unreasonable, a hybrid approach is also possible in which the hand-written parser is used along with the simpler serde-based `header` parsers, and still falling back to the existing parser-combinator based parser for the line records. --- core/benches/pyreport.rs | 64 +++- core/src/parsers/pyreport/chunks_serde.rs | 375 ++++++++++++++++++++++ core/src/parsers/pyreport/mod.rs | 4 +- 3 files changed, 439 insertions(+), 4 deletions(-) create mode 100644 core/src/parsers/pyreport/chunks_serde.rs diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index 20bd3ba..70b6759 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use codecov_rs::{ - parsers::pyreport::{chunks, report_json}, + parsers::pyreport::{chunks, chunks_serde, report_json}, test_utils::test_report::{TestReport, TestReportBuilder}, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -58,7 +58,7 @@ fn simple_chunks(c: &mut Criterion) { let chunks = &[ // Header and one chunk with an empty line "{}\n<<<<< end_of_header >>>>>\n{}\n", - // No header, one chunk with a populated line and an empty line + // No header, one chunk with a populated line and an empty line "{}\n[1, null, [[0, 1]]]\n", // No header, two chunks, the second having just one empty line "{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", @@ -116,3 +116,63 @@ fn parse_chunks_file(input: &str, files: HashMap, sessions: HashMap< .parse_next(&mut chunks_stream) .unwrap(); } + +#[divan::bench] +fn simple_chunks_serde() { + let chunks: &[&[u8]] = &[ + // Header and one chunk with an empty line + b"{}\n<<<<< end_of_header >>>>>\n{}\n", + // No header, one chunk with a populated line and an empty line + b"{}\n[1, null, [[0, 1]]]\n", + // No header, two chunks, the second having just one empty line + b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", + // Header, two chunks, the second having multiple data lines and an empty line + b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", + ]; + + for input in chunks { + parse_chunks_file_serde(input) + } +} + +// this is currently <300 ms on my machine +#[divan::bench(sample_count = 10)] +fn complex_chunks_serde(bencher: Bencher) { + // this is a ~96M `chunks` file + let chunks = + load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt"); + + bencher.bench(|| parse_chunks_file_serde(&chunks)); +} + +fn parse_chunks_file_serde(input: &[u8]) { + let mut parser = chunks_serde::Parser::new(input); + loop { + // TODO: these are just for debugging + let rest = parser.rest; + let expecting = parser.expecting; + let event = parser.next(); + match event { + Ok(None) => break, + Ok(Some(_)) => {} + Err(err) => { + let rest = std::str::from_utf8(rest).unwrap(); + let rest = rest.get(..32).unwrap_or(rest); + dbg!(rest, expecting); + panic!("{err}"); + } + } + } +} + +#[track_caller] +fn load_fixture(path: &str) -> Vec { + let path = format!("./fixtures/{path}"); + let contents = std::fs::read(path).unwrap(); + + if contents.starts_with(b"version https://git-lfs.github.com/spec/v1") { + panic!("Fixture has not been pulled from Git LFS"); + } + + contents +} diff --git a/core/src/parsers/pyreport/chunks_serde.rs b/core/src/parsers/pyreport/chunks_serde.rs new file mode 100644 index 0000000..9335d8c --- /dev/null +++ b/core/src/parsers/pyreport/chunks_serde.rs @@ -0,0 +1,375 @@ +//! A parser for the `chunks` file format. +//! +//! A chunks file contains an optional header and a series of 1 or more +//! "chunks", separated by an `END_OF_CHUNK` terminator. +//! +//! Chunks files sometimes begin with a JSON object followed by an +//! `END_OF_HEADER` terminator string. +//! The JSON object contains: +//! - `"labels_index"`: assigns a numeric ID to each label to save space +//! +//! If the `"labels_index"` key is present, this parser will insert each label +//! into the report as a [`crate::report::models::Context`] and create a mapping +//! in `buf.state.labels_index` from numeric ID in the header to the +//! new `Context`'s ID in the output report. If the `"labels_index"` key is +//! _not_ present, we will populate `buf.state.labels_index` gradually as we +//! encounter new labels during parsing. +//! +//! A chunk contains all of the line-by-line measurements for +//! a file. The Nth chunk corresponds to the file whose entry in +//! `buf.state.report_json_files` has N in its `chunks_index` field. +//! +//! Each new chunk will reset `buf.state.chunk.current_line` to 0 when it starts +//! and increment `buf.state.chunk.index` when it ends so that the next chunk +//! can associate its data with the correct file. +//! +//! A line may be empty, or it may contain a [`LineRecord`]. +//! A [`LineRecord`] itself does not correspond to anything in the output, +//! but it's an umbrella that includes all of the data +//! tied to a line/[`CoverageSample`]. +//! +//! This parser performs all the writes it can to the output +//! stream and only returns a `ReportLine` for tests. The `report_line_or_empty` +//! parser which wraps this and supports empty lines returns `Ok(())`. + +use std::{collections::HashMap, fmt}; + +use serde::{de, de::IgnoredAny, Deserialize}; + +#[derive(Debug)] +pub struct Parser<'d> { + // TODO: these are pub just for debugging + pub rest: &'d [u8], + pub expecting: Expecting, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum ParserEvent { + EmptyLineRecord, + LineRecord(LineRecord), + EmptyChunk, + FileHeader(FileHeader), + ChunkHeader(ChunkHeader), +} + +#[derive(Debug, PartialEq, Eq, Default, Deserialize)] +pub struct FileHeader { + #[serde(default)] + pub labels_index: HashMap, +} + +#[derive(Debug, PartialEq, Eq, Default, Deserialize)] +pub struct ChunkHeader { + #[serde(default)] + pub present_sessions: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +struct IgnoredAnyEq(IgnoredAny); +impl PartialEq for IgnoredAnyEq { + fn eq(&self, _other: &Self) -> bool { + true + } +} +impl Eq for IgnoredAnyEq {} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub struct LineRecord( + /// coverage + Coverage, + /// coverage type + Option, + /// sessions + Vec, + /// messages + #[serde(default)] + Option, + /// complexity + #[serde(default)] + Option, + /// TODO: datapoints + #[serde(default)] + Option, +); + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub struct LineSession( + /// session id + u32, + /// coverage + Coverage, + /// TODO: branches + #[serde(default)] + Option, + /// TODO: partials + #[serde(default)] + Option, + /// TODO: complexity + #[serde(default)] + Option, +); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)] +#[serde(try_from = "&str")] +pub enum CoverageType { + #[default] + Line, + Branch, + Method, +} + +impl<'s> TryFrom<&'s str> for CoverageType { + type Error = &'s str; + + fn try_from(value: &'s str) -> Result { + match value { + "line" => Ok(Self::Line), + "b" | "branch" => Ok(Self::Branch), + "m" | "method" => Ok(Self::Method), + s => Err(s), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Coverage { + Partial, + BranchTaken(u32, u32), + HitCount(u32), +} + +impl<'de> Deserialize<'de> for Coverage { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct CoverageVisitor; + impl<'de> de::Visitor<'de> for CoverageVisitor { + type Value = Coverage; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a coverage value") + } + + fn visit_bool(self, v: bool) -> Result + where + E: de::Error, + { + if v { + Ok(Coverage::Partial) + } else { + Err(de::Error::invalid_value(de::Unexpected::Bool(v), &self)) + } + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + Ok(Coverage::HitCount(value as u32)) + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let invalid = || de::Error::invalid_value(de::Unexpected::Str(v), &self); + let (covered, total) = v.split_once('/').ok_or_else(invalid)?; + + let covered: u32 = covered.parse().map_err(|_| invalid())?; + let total: u32 = total.parse().map_err(|_| invalid())?; + Ok(Coverage::BranchTaken(covered, total)) + } + } + + deserializer.deserialize_any(CoverageVisitor) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum ParserError { + #[error("unexpected EOF")] + UnexpectedEof, + #[error("unexpected input")] + UnexpectedInput, + #[error("invalid file header")] + InvalidFileHeader(#[source] serde_json::Error), + #[error("invalid chunk header")] + InvalidChunkHeader(#[source] serde_json::Error), + #[error("invalid line record")] + InvalidLineRecord(#[source] serde_json::Error), +} + +impl PartialEq for ParserError { + fn eq(&self, other: &Self) -> bool { + core::mem::discriminant(self) == core::mem::discriminant(other) + } +} +impl Eq for ParserError {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Expecting { + FileHeader, + ChunkHeader, + LineRecord, + EndOfChunk, +} + +const END_OF_CHUNK: &[u8] = b"<<<<< end_of_chunk >>>>>"; +const END_OF_HEADER: &[u8] = b"<<<<< end_of_header >>>>>"; + +// `slice::split_once` is still unstable: +// +fn slice_split_once(slice: &[u8], pred: u8) -> Option<(&[u8], &[u8])> { + let index = slice.iter().position(|b| *b == pred)?; + Some((&slice[..index], &slice[index + 1..])) +} + +impl<'d> Parser<'d> { + pub fn new(input: &'d [u8]) -> Self { + Self { + rest: input, + expecting: Expecting::FileHeader, + } + } + + pub fn next(&mut self) -> Result, ParserError> { + loop { + let Some((line, rest)) = slice_split_once(self.rest, b'\n') else { + return Ok(None); + }; + self.rest = rest; + + if self.expecting == Expecting::LineRecord { + if line.is_empty() { + return Ok(Some(ParserEvent::EmptyLineRecord)); + } + if line == END_OF_CHUNK { + self.expecting = Expecting::ChunkHeader; + continue; + } + + let line_record: LineRecord = + serde_json::from_slice(line).map_err(ParserError::InvalidLineRecord)?; + return Ok(Some(ParserEvent::LineRecord(line_record))); + } + + if self.expecting == Expecting::EndOfChunk { + if line != END_OF_CHUNK { + return Err(ParserError::UnexpectedInput); + } + + self.expecting = Expecting::ChunkHeader; + continue; + } + + // else: expecting a file or chunk header + + // this is an empty chunk (header) + if line == b"null" { + self.expecting = Expecting::EndOfChunk; + + return Ok(Some(ParserEvent::EmptyChunk)); + } + + // otherwise, the header has to be a JSON object + if !line.starts_with(b"{") { + return Err(ParserError::UnexpectedInput); + } + if self.expecting == Expecting::FileHeader { + if let Some((next_line, rest)) = slice_split_once(self.rest, b'\n') { + if next_line == END_OF_HEADER { + self.rest = rest; + self.expecting = Expecting::ChunkHeader; + + let file_header: FileHeader = + serde_json::from_slice(line).map_err(ParserError::InvalidFileHeader)?; + return Ok(Some(ParserEvent::FileHeader(file_header))); + } + } + } + // else: chunk header + + self.expecting = Expecting::LineRecord; + + let chunk_header: ChunkHeader = + serde_json::from_slice(line).map_err(ParserError::InvalidChunkHeader)?; + return Ok(Some(ParserEvent::ChunkHeader(chunk_header))); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + type ParserItem = Result, ParserError>; + + #[test] + fn test_parsing_events() { + let simple_line_record = LineRecord( + Coverage::HitCount(1), + None, + vec![LineSession(0, Coverage::HitCount(1), None, None, None)], + None, + None, + None, + ); + + let cases: &[(&[u8], &[ParserItem])] = &[ + ( + // Header and one chunk with an empty line + b"{}\n<<<<< end_of_header >>>>>\n{}\n", + &[ + Ok(Some(ParserEvent::FileHeader(FileHeader::default()))), + Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), + Ok(None), + ], + ), + ( + // No header, one chunk with a populated line and an empty line + b"{}\n[1, null, [[0, 1]]]\n", + &[ + Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), + Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), + Ok(None), + ], + ), + ( + // No header, two chunks, the second having just one empty line + b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", + &[ + Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), + Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), + Ok(Some(ParserEvent::EmptyLineRecord)), + Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), + Ok(None), + ], + ), + ( + // Header, two chunks, the second having multiple data lines and an empty line + b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", + &[ + Ok(Some(ParserEvent::FileHeader(FileHeader::default()))), + Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), + Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), + Ok(Some(ParserEvent::EmptyLineRecord)), + Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), + Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), + Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), + Ok(None), + ], + ), + ]; + + for (input, expected_events) in cases { + let mut parser = Parser::new(input); + + for expected_event in *expected_events { + dbg!(std::str::from_utf8(parser.rest).unwrap(), parser.expecting); + let event = parser.next(); + assert_eq!(dbg!(event), *expected_event); + } + } + } +} diff --git a/core/src/parsers/pyreport/mod.rs b/core/src/parsers/pyreport/mod.rs index 4a4b1f2..6d9ff6a 100644 --- a/core/src/parsers/pyreport/mod.rs +++ b/core/src/parsers/pyreport/mod.rs @@ -8,9 +8,9 @@ use crate::{ report::{SqliteReport, SqliteReportBuilder, SqliteReportBuilderTx}, }; -pub mod report_json; - pub mod chunks; +pub mod chunks_serde; +pub mod report_json; mod utils; From e47f552f5eae7063147c1b4857f61972aad03666 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 4 Sep 2024 11:57:09 +0200 Subject: [PATCH 2/9] Use `memchr`-based splitting instead of an iterator/event-based interface --- Cargo.lock | 1 + core/Cargo.toml | 1 + core/benches/pyreport.rs | 22 +- core/src/parsers/pyreport/chunks_serde.rs | 330 +++++++++++----------- 4 files changed, 179 insertions(+), 175 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 17912c4..38be938 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -138,6 +138,7 @@ version = "0.1.0" dependencies = [ "codspeed-criterion-compat", "include_dir", + "memchr", "memmap2", "rand", "rusqlite", diff --git a/core/Cargo.toml b/core/Cargo.toml index 9aa0a6f..a422907 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -10,6 +10,7 @@ testing = [] [dependencies] include_dir = "0.7.3" +memchr = "2.7.4" memmap2 = "0.9.5" rand = "0.8.5" rusqlite = { version = "0.31.0", features = [ diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index 70b6759..9eaa62a 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, hint::black_box}; use codecov_rs::{ parsers::pyreport::{chunks, chunks_serde, report_json}, @@ -146,21 +146,11 @@ fn complex_chunks_serde(bencher: Bencher) { } fn parse_chunks_file_serde(input: &[u8]) { - let mut parser = chunks_serde::Parser::new(input); - loop { - // TODO: these are just for debugging - let rest = parser.rest; - let expecting = parser.expecting; - let event = parser.next(); - match event { - Ok(None) => break, - Ok(Some(_)) => {} - Err(err) => { - let rest = std::str::from_utf8(rest).unwrap(); - let rest = rest.get(..32).unwrap_or(rest); - dbg!(rest, expecting); - panic!("{err}"); - } + let chunks_file = chunks_serde::ChunksFile::new(input).unwrap(); + let mut chunks = chunks_file.chunks(); + while let Some(mut chunk) = chunks.next_chunk().unwrap() { + while let Some(line) = chunk.next_line().unwrap() { + black_box(line); } } } diff --git a/core/src/parsers/pyreport/chunks_serde.rs b/core/src/parsers/pyreport/chunks_serde.rs index 9335d8c..121057b 100644 --- a/core/src/parsers/pyreport/chunks_serde.rs +++ b/core/src/parsers/pyreport/chunks_serde.rs @@ -32,24 +32,146 @@ //! stream and only returns a `ReportLine` for tests. The `report_line_or_empty` //! parser which wraps this and supports empty lines returns `Ok(())`. -use std::{collections::HashMap, fmt}; +use std::{collections::HashMap, fmt, mem, sync::OnceLock}; +use memchr::{memchr, memmem}; use serde::{de, de::IgnoredAny, Deserialize}; +use crate::report::pyreport::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR}; + +#[derive(Debug, thiserror::Error)] +pub enum ParserError { + #[error("unexpected EOF")] + UnexpectedEof, + #[error("unexpected input")] + UnexpectedInput, + #[error("invalid file header")] + InvalidFileHeader(#[source] serde_json::Error), + #[error("invalid chunk header")] + InvalidChunkHeader(#[source] serde_json::Error), + #[error("invalid line record")] + InvalidLineRecord(#[source] serde_json::Error), +} + +impl PartialEq for ParserError { + fn eq(&self, other: &Self) -> bool { + core::mem::discriminant(self) == core::mem::discriminant(other) + } +} +impl Eq for ParserError {} + #[derive(Debug)] -pub struct Parser<'d> { - // TODO: these are pub just for debugging - pub rest: &'d [u8], - pub expecting: Expecting, +pub struct ChunksFile<'d> { + file_header: FileHeader, + input: &'d [u8], +} + +impl<'d> ChunksFile<'d> { + pub fn new(mut input: &'d [u8]) -> Result { + static HEADER_FINDER: OnceLock = OnceLock::new(); + let header_finder = + HEADER_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_HEADER_TERMINATOR)); + + let file_header = if let Some(pos) = header_finder.find(input) { + let header_bytes = &input[..pos]; + input = &input[pos + header_finder.needle().len()..]; + let file_header: FileHeader = + serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?; + file_header + } else { + FileHeader::default() + }; + + Ok(Self { file_header, input }) + } + + pub fn labels_index(&self) -> &HashMap { + &self.file_header.labels_index + } + + pub fn chunks(&self) -> Chunks { + Chunks { input: self.input } + } } -#[derive(Debug, PartialEq, Eq)] -pub enum ParserEvent { - EmptyLineRecord, - LineRecord(LineRecord), - EmptyChunk, - FileHeader(FileHeader), - ChunkHeader(ChunkHeader), +pub struct Chunks<'d> { + input: &'d [u8], +} + +impl<'d> Chunks<'d> { + pub fn next_chunk(&mut self) -> Result>, ParserError> { + if self.input.is_empty() { + return Ok(None); + } + + static CHUNK_FINDER: OnceLock = OnceLock::new(); + let chunk_finder = + CHUNK_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_END_OF_CHUNK)); + + let mut chunk_bytes = if let Some(pos) = chunk_finder.find(self.input) { + let chunk_bytes = &self.input[..pos]; + self.input = &self.input[pos + chunk_finder.needle().len()..]; + chunk_bytes + } else { + mem::take(&mut self.input) + }; + + if chunk_bytes == b"null" { + return Ok(Some(Chunk { + chunk_header: ChunkHeader::default(), + input: &[], + })); + } + + let header_bytes = next_line(&mut chunk_bytes).ok_or(ParserError::UnexpectedInput)?; + let chunk_header: ChunkHeader = + serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?; + + Ok(Some(Chunk { + chunk_header, + input: chunk_bytes, + })) + } +} + +pub struct Chunk<'d> { + chunk_header: ChunkHeader, + input: &'d [u8], +} + +impl<'d> Chunk<'d> { + pub fn present_sessions(&self) -> &[u32] { + &self.chunk_header.present_sessions + } + + pub fn next_line(&mut self) -> Result>, ParserError> { + let Some(line) = next_line(&mut self.input) else { + return Ok(None); + }; + + if line.is_empty() { + return Ok(Some(None)); + } + + let line_record: LineRecord = + serde_json::from_slice(line).map_err(ParserError::InvalidLineRecord)?; + return Ok(Some(Some(line_record))); + } +} + +fn next_line<'d>(input: &mut &'d [u8]) -> Option<&'d [u8]> { + if input.is_empty() { + return None; + } + + let line_bytes = if let Some(pos) = memchr(b'\n', input) { + let line_bytes = &input[..pos]; + *input = &input[pos + 1..]; + line_bytes + } else { + mem::take(input) + }; + Some(line_bytes) } #[derive(Debug, PartialEq, Eq, Default, Deserialize)] @@ -186,125 +308,10 @@ impl<'de> Deserialize<'de> for Coverage { } } -#[derive(Debug, thiserror::Error)] -pub enum ParserError { - #[error("unexpected EOF")] - UnexpectedEof, - #[error("unexpected input")] - UnexpectedInput, - #[error("invalid file header")] - InvalidFileHeader(#[source] serde_json::Error), - #[error("invalid chunk header")] - InvalidChunkHeader(#[source] serde_json::Error), - #[error("invalid line record")] - InvalidLineRecord(#[source] serde_json::Error), -} - -impl PartialEq for ParserError { - fn eq(&self, other: &Self) -> bool { - core::mem::discriminant(self) == core::mem::discriminant(other) - } -} -impl Eq for ParserError {} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Expecting { - FileHeader, - ChunkHeader, - LineRecord, - EndOfChunk, -} - -const END_OF_CHUNK: &[u8] = b"<<<<< end_of_chunk >>>>>"; -const END_OF_HEADER: &[u8] = b"<<<<< end_of_header >>>>>"; - -// `slice::split_once` is still unstable: -// -fn slice_split_once(slice: &[u8], pred: u8) -> Option<(&[u8], &[u8])> { - let index = slice.iter().position(|b| *b == pred)?; - Some((&slice[..index], &slice[index + 1..])) -} - -impl<'d> Parser<'d> { - pub fn new(input: &'d [u8]) -> Self { - Self { - rest: input, - expecting: Expecting::FileHeader, - } - } - - pub fn next(&mut self) -> Result, ParserError> { - loop { - let Some((line, rest)) = slice_split_once(self.rest, b'\n') else { - return Ok(None); - }; - self.rest = rest; - - if self.expecting == Expecting::LineRecord { - if line.is_empty() { - return Ok(Some(ParserEvent::EmptyLineRecord)); - } - if line == END_OF_CHUNK { - self.expecting = Expecting::ChunkHeader; - continue; - } - - let line_record: LineRecord = - serde_json::from_slice(line).map_err(ParserError::InvalidLineRecord)?; - return Ok(Some(ParserEvent::LineRecord(line_record))); - } - - if self.expecting == Expecting::EndOfChunk { - if line != END_OF_CHUNK { - return Err(ParserError::UnexpectedInput); - } - - self.expecting = Expecting::ChunkHeader; - continue; - } - - // else: expecting a file or chunk header - - // this is an empty chunk (header) - if line == b"null" { - self.expecting = Expecting::EndOfChunk; - - return Ok(Some(ParserEvent::EmptyChunk)); - } - - // otherwise, the header has to be a JSON object - if !line.starts_with(b"{") { - return Err(ParserError::UnexpectedInput); - } - if self.expecting == Expecting::FileHeader { - if let Some((next_line, rest)) = slice_split_once(self.rest, b'\n') { - if next_line == END_OF_HEADER { - self.rest = rest; - self.expecting = Expecting::ChunkHeader; - - let file_header: FileHeader = - serde_json::from_slice(line).map_err(ParserError::InvalidFileHeader)?; - return Ok(Some(ParserEvent::FileHeader(file_header))); - } - } - } - // else: chunk header - - self.expecting = Expecting::LineRecord; - - let chunk_header: ChunkHeader = - serde_json::from_slice(line).map_err(ParserError::InvalidChunkHeader)?; - return Ok(Some(ParserEvent::ChunkHeader(chunk_header))); - } - } -} - #[cfg(test)] mod tests { use super::*; - type ParserItem = Result, ParserError>; - #[test] fn test_parsing_events() { let simple_line_record = LineRecord( @@ -316,60 +323,65 @@ mod tests { None, ); - let cases: &[(&[u8], &[ParserItem])] = &[ + let cases: &[( + &[u8], // input + HashMap, // labels index + &[(&[u32], &[Option])], // chunks: session ids, line records + )] = &[ ( // Header and one chunk with an empty line b"{}\n<<<<< end_of_header >>>>>\n{}\n", - &[ - Ok(Some(ParserEvent::FileHeader(FileHeader::default()))), - Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), - Ok(None), - ], + HashMap::default(), + &[(&[], &[])], ), ( // No header, one chunk with a populated line and an empty line b"{}\n[1, null, [[0, 1]]]\n", - &[ - Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), - Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), - Ok(None), - ], + HashMap::default(), + &[(&[], &[Some(simple_line_record.clone())])], ), ( // No header, two chunks, the second having just one empty line b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", - &[ - Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), - Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), - Ok(Some(ParserEvent::EmptyLineRecord)), - Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), - Ok(None), - ], + HashMap::default(), + &[(&[], &[Some(simple_line_record.clone())]), (&[], &[])], ), ( // Header, two chunks, the second having multiple data lines and an empty line b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", + HashMap::default(), &[ - Ok(Some(ParserEvent::FileHeader(FileHeader::default()))), - Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), - Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), - Ok(Some(ParserEvent::EmptyLineRecord)), - Ok(Some(ParserEvent::ChunkHeader(ChunkHeader::default()))), - Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), - Ok(Some(ParserEvent::LineRecord(simple_line_record.clone()))), - Ok(None), + (&[], &[Some(simple_line_record.clone())]), + ( + &[], + &[ + Some(simple_line_record.clone()), + Some(simple_line_record.clone()), + ], + ), ], ), ]; - for (input, expected_events) in cases { - let mut parser = Parser::new(input); + for (input, expected_labels_index, expected_chunks) in cases { + let chunks_file = ChunksFile::new(input).unwrap(); + let mut chunks = chunks_file.chunks(); + + assert_eq!(chunks_file.labels_index(), expected_labels_index); + + for (expected_sessions, expected_line_records) in *expected_chunks { + let mut chunk = chunks.next_chunk().unwrap().unwrap(); + + assert_eq!(chunk.present_sessions(), *expected_sessions); + + let mut lines = vec![]; + while let Some(line) = chunk.next_line().unwrap() { + lines.push(line); + } - for expected_event in *expected_events { - dbg!(std::str::from_utf8(parser.rest).unwrap(), parser.expecting); - let event = parser.next(); - assert_eq!(dbg!(event), *expected_event); + assert_eq!(lines, *expected_line_records); } + assert!(chunks.next_chunk().unwrap().is_none()); } } } From 6f8af3896e61a2bc5be6ab1023f50dfedb28f6a2 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 4 Sep 2024 13:03:38 +0200 Subject: [PATCH 3/9] get closer to the existing parser interface dealing with report builders --- core/benches/pyreport.rs | 28 +++--- core/src/error.rs | 5 + core/src/parsers/pyreport/chunks_serde.rs | 115 +++++++++++++++++++--- core/src/report/pyreport/types.rs | 17 ++++ 4 files changed, 140 insertions(+), 25 deletions(-) diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index 9eaa62a..21621c7 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, hint::black_box}; +use std::collections::HashMap; use codecov_rs::{ parsers::pyreport::{chunks, chunks_serde, report_json}, @@ -130,8 +130,13 @@ fn simple_chunks_serde() { b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", ]; + let report_json = report_json::ParsedReportJson { + files: Default::default(), + sessions: Default::default(), + }; + for input in chunks { - parse_chunks_file_serde(input) + parse_chunks_file_serde(input, &report_json); } } @@ -142,17 +147,18 @@ fn complex_chunks_serde(bencher: Bencher) { let chunks = load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt"); - bencher.bench(|| parse_chunks_file_serde(&chunks)); + // parsing the chunks depends on having loaded the `report_json` + let report = load_fixture( + "pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json", + ); + let report_json = parse_report_json(&report); + + bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json)); } -fn parse_chunks_file_serde(input: &[u8]) { - let chunks_file = chunks_serde::ChunksFile::new(input).unwrap(); - let mut chunks = chunks_file.chunks(); - while let Some(mut chunk) = chunks.next_chunk().unwrap() { - while let Some(line) = chunk.next_line().unwrap() { - black_box(line); - } - } +fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) { + let mut report_builder = TestReportBuilder::default(); + chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap(); } #[track_caller] diff --git a/core/src/error.rs b/core/src/error.rs index 5f793bb..b3d0563 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -1,5 +1,7 @@ use thiserror::Error; +use crate::parsers::pyreport::chunks_serde::ChunksFileParseError; + pub type Result = std::result::Result; #[derive(Error, Debug)] @@ -26,4 +28,7 @@ pub enum CodecovError { #[cfg(feature = "pyreport")] #[error("failed to convert sqlite to pyreport: '{0}'")] PyreportConversionError(String), + + #[error(transparent)] + ChunksFileParseError(#[from] ChunksFileParseError), } diff --git a/core/src/parsers/pyreport/chunks_serde.rs b/core/src/parsers/pyreport/chunks_serde.rs index 121057b..9372e7b 100644 --- a/core/src/parsers/pyreport/chunks_serde.rs +++ b/core/src/parsers/pyreport/chunks_serde.rs @@ -37,10 +37,84 @@ use std::{collections::HashMap, fmt, mem, sync::OnceLock}; use memchr::{memchr, memmem}; use serde::{de, de::IgnoredAny, Deserialize}; -use crate::report::pyreport::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR}; +use super::report_json::ParsedReportJson; +use crate::{ + error::CodecovError, + report::{ + models, + pyreport::{ + types::{self, PyreportCoverage, ReportLine}, + CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR, + }, + Report, ReportBuilder, + }, +}; + +pub fn parse_chunks_file( + input: &[u8], + _report_json: &ParsedReportJson, + builder: &mut B, +) -> Result<(), CodecovError> +where + B: ReportBuilder, + R: Report, +{ + let chunks_file = ChunksFile::new(input)?; + + let mut labels_index = HashMap::with_capacity(chunks_file.labels_index().len()); + for (index, name) in chunks_file.labels_index() { + let context = builder.insert_context(name)?; + labels_index.insert(index.clone(), context.id); + } + + let mut report_lines = vec![]; + + let mut chunks = chunks_file.chunks(); + while let Some(mut chunk) = chunks.next_chunk()? { + let mut line_no = 0; + report_lines.clear(); + while let Some(line) = chunk.next_line()? { + line_no += 1; + if let Some(line) = line { + let coverage_type = match line.1.unwrap_or_default() { + CoverageType::Line => models::CoverageType::Line, + CoverageType::Branch => models::CoverageType::Branch, + CoverageType::Method => models::CoverageType::Method, + }; + let sessions = line + .2 + .into_iter() + .map(|session| types::LineSession { + session_id: session.0, + coverage: session.1.into(), + branches: None, // TODO + partials: None, // TODO + complexity: None, // TODO + }) + .collect(); + + let mut report_line = ReportLine { + line_no, + coverage: line.0.into(), + coverage_type, + sessions, + _messages: None, + _complexity: None, + datapoints: None, // TODO + }; + report_line.normalize(); + report_lines.push(report_line); + } + } + // TODO: + // utils::save_report_lines()?; + } + + Ok(()) +} #[derive(Debug, thiserror::Error)] -pub enum ParserError { +pub enum ChunksFileParseError { #[error("unexpected EOF")] UnexpectedEof, #[error("unexpected input")] @@ -53,12 +127,12 @@ pub enum ParserError { InvalidLineRecord(#[source] serde_json::Error), } -impl PartialEq for ParserError { +impl PartialEq for ChunksFileParseError { fn eq(&self, other: &Self) -> bool { core::mem::discriminant(self) == core::mem::discriminant(other) } } -impl Eq for ParserError {} +impl Eq for ChunksFileParseError {} #[derive(Debug)] pub struct ChunksFile<'d> { @@ -67,7 +141,7 @@ pub struct ChunksFile<'d> { } impl<'d> ChunksFile<'d> { - pub fn new(mut input: &'d [u8]) -> Result { + pub fn new(mut input: &'d [u8]) -> Result { static HEADER_FINDER: OnceLock = OnceLock::new(); let header_finder = HEADER_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_HEADER_TERMINATOR)); @@ -75,8 +149,8 @@ impl<'d> ChunksFile<'d> { let file_header = if let Some(pos) = header_finder.find(input) { let header_bytes = &input[..pos]; input = &input[pos + header_finder.needle().len()..]; - let file_header: FileHeader = - serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?; + let file_header: FileHeader = serde_json::from_slice(header_bytes) + .map_err(ChunksFileParseError::InvalidFileHeader)?; file_header } else { FileHeader::default() @@ -99,7 +173,7 @@ pub struct Chunks<'d> { } impl<'d> Chunks<'d> { - pub fn next_chunk(&mut self) -> Result>, ParserError> { + pub fn next_chunk(&mut self) -> Result>, ChunksFileParseError> { if self.input.is_empty() { return Ok(None); } @@ -123,9 +197,10 @@ impl<'d> Chunks<'d> { })); } - let header_bytes = next_line(&mut chunk_bytes).ok_or(ParserError::UnexpectedInput)?; - let chunk_header: ChunkHeader = - serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?; + let header_bytes = + next_line(&mut chunk_bytes).ok_or(ChunksFileParseError::UnexpectedInput)?; + let chunk_header: ChunkHeader = serde_json::from_slice(header_bytes) + .map_err(ChunksFileParseError::InvalidFileHeader)?; Ok(Some(Chunk { chunk_header, @@ -144,7 +219,7 @@ impl<'d> Chunk<'d> { &self.chunk_header.present_sessions } - pub fn next_line(&mut self) -> Result>, ParserError> { + pub fn next_line(&mut self) -> Result>, ChunksFileParseError> { let Some(line) = next_line(&mut self.input) else { return Ok(None); }; @@ -154,7 +229,7 @@ impl<'d> Chunk<'d> { } let line_record: LineRecord = - serde_json::from_slice(line).map_err(ParserError::InvalidLineRecord)?; + serde_json::from_slice(line).map_err(ChunksFileParseError::InvalidLineRecord)?; return Ok(Some(Some(line_record))); } } @@ -217,7 +292,7 @@ pub struct LineRecord( #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] pub struct LineSession( /// session id - u32, + usize, /// coverage Coverage, /// TODO: branches @@ -260,6 +335,18 @@ pub enum Coverage { HitCount(u32), } +impl Into for Coverage { + fn into(self) -> PyreportCoverage { + match self { + Coverage::Partial => PyreportCoverage::Partial(), + Coverage::BranchTaken(covered, total) => { + PyreportCoverage::BranchesTaken { covered, total } + } + Coverage::HitCount(hits) => PyreportCoverage::HitCount(hits), + } + } +} + impl<'de> Deserialize<'de> for Coverage { fn deserialize(deserializer: D) -> Result where diff --git a/core/src/report/pyreport/types.rs b/core/src/report/pyreport/types.rs index 2fc110d..0da5847 100644 --- a/core/src/report/pyreport/types.rs +++ b/core/src/report/pyreport/types.rs @@ -187,6 +187,23 @@ pub struct ReportLine { pub datapoints: Option>>, } +impl ReportLine { + pub fn normalize(&mut self) { + // Fix issues like recording branch coverage with `CoverageType::Method` + let (correct_coverage, correct_type) = + normalize_coverage_measurement(&self.coverage, &self.coverage_type); + self.coverage = correct_coverage; + self.coverage_type = correct_type; + + // Fix the `coverage` values in each `LineSession` as well + for line_session in &mut self.sessions { + let (correct_coverage, _) = + normalize_coverage_measurement(&line_session.coverage, &self.coverage_type); + line_session.coverage = correct_coverage; + } + } +} + /// Account for some quirks and malformed data. See code comments for details. pub(crate) fn normalize_coverage_measurement( coverage: &PyreportCoverage, From d6f4a47f6fc06c2503c245dc05f0de90fb5bd6e1 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 12 Nov 2024 13:11:11 +0100 Subject: [PATCH 4/9] Implement missing chunks parser features This should implement everything except for the `complexity` parser. --- core/benches/pyreport.rs | 92 ++------- core/src/parsers/pyreport/chunks_serde.rs | 226 +++++++++++++--------- core/src/parsers/pyreport/report_json.rs | 2 +- core/src/report/models.rs | 5 +- core/src/report/pyreport/types.rs | 10 +- test_utils/src/fixtures.rs | 8 +- 6 files changed, 161 insertions(+), 182 deletions(-) diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index 21621c7..fceffef 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,12 +1,11 @@ use std::collections::HashMap; use codecov_rs::{ - parsers::pyreport::{chunks, chunks_serde, report_json}, - test_utils::test_report::{TestReport, TestReportBuilder}, + parsers::pyreport::{chunks_serde, report_json}, + test_utils::test_report::TestReportBuilder, }; use criterion::{criterion_group, criterion_main, Criterion}; use test_utils::fixtures::{read_fixture, FixtureFormat::Pyreport, FixtureSize::Large}; -use winnow::Parser as _; criterion_group!( benches, @@ -55,24 +54,25 @@ fn parse_report_json(input: &[u8]) -> report_json::ParsedReportJson { } fn simple_chunks(c: &mut Criterion) { - let chunks = &[ + let chunks: &[&[u8]] = &[ // Header and one chunk with an empty line - "{}\n<<<<< end_of_header >>>>>\n{}\n", + b"{}\n<<<<< end_of_header >>>>>\n{}\n", // No header, one chunk with a populated line and an empty line - "{}\n[1, null, [[0, 1]]]\n", + b"{}\n[1, null, [[0, 1]]]\n", // No header, two chunks, the second having just one empty line - "{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", + b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", // Header, two chunks, the second having multiple data lines and an empty line - "{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", + b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", ]; let files = HashMap::from([(0, 0), (1, 1), (2, 2)]); let sessions = HashMap::from([(0, 0), (1, 1), (2, 2)]); + let report_json = report_json::ParsedReportJson { files, sessions }; c.bench_function("simple_chunks", |b| { b.iter(|| { for input in chunks { - parse_chunks_file(input, files.clone(), sessions.clone()) + parse_chunks_file_serde(input, report_json.clone()); } }) }); @@ -87,7 +87,6 @@ fn complex_chunks(c: &mut Criterion) { "worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt", ) .unwrap(); - let chunks = std::str::from_utf8(&chunks).unwrap(); // parsing the chunks depends on having loaded the `report_json` let report = read_fixture( @@ -96,79 +95,14 @@ fn complex_chunks(c: &mut Criterion) { "worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json", ) .unwrap(); - let report_json::ParsedReportJson { files, sessions } = parse_report_json(&report); + let report_json = parse_report_json(&report); c.bench_function("complex_chunks", |b| { - b.iter(|| parse_chunks_file(chunks, files.clone(), sessions.clone())) + b.iter(|| parse_chunks_file_serde(&chunks, report_json.clone())) }); } -fn parse_chunks_file(input: &str, files: HashMap, sessions: HashMap) { +fn parse_chunks_file_serde(input: &[u8], report_json: report_json::ParsedReportJson) { let report_builder = TestReportBuilder::default(); - - let chunks_ctx = chunks::ParseCtx::new(report_builder, files, sessions); - let mut chunks_stream = chunks::ReportOutputStream::<&str, TestReport, TestReportBuilder> { - input, - state: chunks_ctx, - }; - - chunks::parse_chunks_file - .parse_next(&mut chunks_stream) - .unwrap(); -} - -#[divan::bench] -fn simple_chunks_serde() { - let chunks: &[&[u8]] = &[ - // Header and one chunk with an empty line - b"{}\n<<<<< end_of_header >>>>>\n{}\n", - // No header, one chunk with a populated line and an empty line - b"{}\n[1, null, [[0, 1]]]\n", - // No header, two chunks, the second having just one empty line - b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", - // Header, two chunks, the second having multiple data lines and an empty line - b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", - ]; - - let report_json = report_json::ParsedReportJson { - files: Default::default(), - sessions: Default::default(), - }; - - for input in chunks { - parse_chunks_file_serde(input, &report_json); - } -} - -// this is currently <300 ms on my machine -#[divan::bench(sample_count = 10)] -fn complex_chunks_serde(bencher: Bencher) { - // this is a ~96M `chunks` file - let chunks = - load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt"); - - // parsing the chunks depends on having loaded the `report_json` - let report = load_fixture( - "pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json", - ); - let report_json = parse_report_json(&report); - - bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json)); -} - -fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) { - let mut report_builder = TestReportBuilder::default(); - chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap(); -} - -#[track_caller] -fn load_fixture(path: &str) -> Vec { - let path = format!("./fixtures/{path}"); - let contents = std::fs::read(path).unwrap(); - - if contents.starts_with(b"version https://git-lfs.github.com/spec/v1") { - panic!("Fixture has not been pulled from Git LFS"); - } - - contents + chunks_serde::parse_chunks_file(input, report_json, report_builder).unwrap(); } diff --git a/core/src/parsers/pyreport/chunks_serde.rs b/core/src/parsers/pyreport/chunks_serde.rs index 9372e7b..2ec60fa 100644 --- a/core/src/parsers/pyreport/chunks_serde.rs +++ b/core/src/parsers/pyreport/chunks_serde.rs @@ -9,11 +9,12 @@ //! - `"labels_index"`: assigns a numeric ID to each label to save space //! //! If the `"labels_index"` key is present, this parser will insert each label -//! into the report as a [`crate::report::models::Context`] and create a mapping +//! into the report as a [`Context`](models::Context) and create a mapping //! in `buf.state.labels_index` from numeric ID in the header to the -//! new `Context`'s ID in the output report. If the `"labels_index"` key is -//! _not_ present, we will populate `buf.state.labels_index` gradually as we -//! encounter new labels during parsing. +//! new [`Context`](models::Context)'s ID in the output report. If the +//! `"labels_index"` key is _not_ present, we will populate +//! `buf.state.labels_index` gradually as we encounter new labels during +//! parsing. //! //! A chunk contains all of the line-by-line measurements for //! a file. The Nth chunk corresponds to the file whose entry in @@ -26,24 +27,24 @@ //! A line may be empty, or it may contain a [`LineRecord`]. //! A [`LineRecord`] itself does not correspond to anything in the output, //! but it's an umbrella that includes all of the data -//! tied to a line/[`CoverageSample`]. +//! tied to a line/[`CoverageSample`](models::CoverageSample). //! //! This parser performs all the writes it can to the output -//! stream and only returns a `ReportLine` for tests. The `report_line_or_empty` -//! parser which wraps this and supports empty lines returns `Ok(())`. +//! stream and only returns a [`ReportLine`] for tests. The +//! `report_line_or_empty` parser which wraps this and supports empty lines +//! returns `Ok(())`. use std::{collections::HashMap, fmt, mem, sync::OnceLock}; use memchr::{memchr, memmem}; use serde::{de, de::IgnoredAny, Deserialize}; -use super::report_json::ParsedReportJson; +use super::{chunks::ParseCtx, report_json::ParsedReportJson, utils}; use crate::{ error::CodecovError, report::{ - models, pyreport::{ - types::{self, PyreportCoverage, ReportLine}, + types::{self, CoverageType, MissingBranch, Partial, PyreportCoverage, ReportLine}, CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR, }, Report, ReportBuilder, @@ -52,8 +53,8 @@ use crate::{ pub fn parse_chunks_file( input: &[u8], - _report_json: &ParsedReportJson, - builder: &mut B, + report_json: ParsedReportJson, + mut builder: B, ) -> Result<(), CodecovError> where B: ReportBuilder, @@ -67,47 +68,50 @@ where labels_index.insert(index.clone(), context.id); } + let mut ctx = ParseCtx::new(builder, report_json.files, report_json.sessions); + let mut report_lines = vec![]; let mut chunks = chunks_file.chunks(); + let mut chunk_no = 0; while let Some(mut chunk) = chunks.next_chunk()? { let mut line_no = 0; report_lines.clear(); while let Some(line) = chunk.next_line()? { line_no += 1; if let Some(line) = line { - let coverage_type = match line.1.unwrap_or_default() { - CoverageType::Line => models::CoverageType::Line, - CoverageType::Branch => models::CoverageType::Branch, - CoverageType::Method => models::CoverageType::Method, - }; let sessions = line .2 .into_iter() .map(|session| types::LineSession { session_id: session.0, - coverage: session.1.into(), - branches: None, // TODO - partials: None, // TODO + coverage: session.1, + branches: session.2.into(), + partials: session.3.into(), complexity: None, // TODO }) .collect(); + let datapoints = line + .5 + .map(|dps| dps.into_iter().map(|dp| (dp.0, dp.into())).collect()); let mut report_line = ReportLine { line_no, - coverage: line.0.into(), - coverage_type, + coverage: line.0, + coverage_type: line.1.unwrap_or_default(), sessions, _messages: None, _complexity: None, - datapoints: None, // TODO + datapoints: Some(datapoints), }; report_line.normalize(); report_lines.push(report_line); } } - // TODO: - // utils::save_report_lines()?; + + ctx.chunk.index = chunk_no; + utils::save_report_lines(&report_lines, &mut ctx)?; + chunk_no += 1; } Ok(()) @@ -214,7 +218,7 @@ pub struct Chunk<'d> { input: &'d [u8], } -impl<'d> Chunk<'d> { +impl Chunk<'_> { pub fn present_sessions(&self) -> &[u32] { &self.chunk_header.present_sessions } @@ -230,7 +234,7 @@ impl<'d> Chunk<'d> { let line_record: LineRecord = serde_json::from_slice(line).map_err(ChunksFileParseError::InvalidLineRecord)?; - return Ok(Some(Some(line_record))); + Ok(Some(Some(line_record))) } } @@ -273,7 +277,7 @@ impl Eq for IgnoredAnyEq {} #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] pub struct LineRecord( /// coverage - Coverage, + PyreportCoverage, /// coverage type Option, /// sessions @@ -284,9 +288,9 @@ pub struct LineRecord( /// complexity #[serde(default)] Option, - /// TODO: datapoints + /// datapoints #[serde(default)] - Option, + Option>, ); #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] @@ -294,25 +298,41 @@ pub struct LineSession( /// session id usize, /// coverage - Coverage, - /// TODO: branches + PyreportCoverage, + /// branches #[serde(default)] - Option, - /// TODO: partials + Option>, + /// partials #[serde(default)] - Option, + Option>, /// TODO: complexity #[serde(default)] Option, ); -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)] -#[serde(try_from = "&str")] -pub enum CoverageType { - #[default] - Line, - Branch, - Method, +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub struct CoverageDatapoint( + /// session id + u32, + /// coverage + PyreportCoverage, + /// coverage type + #[serde(default)] + Option, + /// labels + #[serde(default)] + Option>, +); + +impl From for types::CoverageDatapoint { + fn from(datapoint: CoverageDatapoint) -> Self { + Self { + session_id: datapoint.0, + _coverage: datapoint.1, + _coverage_type: datapoint.2, + labels: datapoint.3.unwrap_or_default(), + } + } } impl<'s> TryFrom<&'s str> for CoverageType { @@ -328,33 +348,14 @@ impl<'s> TryFrom<&'s str> for CoverageType { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Coverage { - Partial, - BranchTaken(u32, u32), - HitCount(u32), -} - -impl Into for Coverage { - fn into(self) -> PyreportCoverage { - match self { - Coverage::Partial => PyreportCoverage::Partial(), - Coverage::BranchTaken(covered, total) => { - PyreportCoverage::BranchesTaken { covered, total } - } - Coverage::HitCount(hits) => PyreportCoverage::HitCount(hits), - } - } -} - -impl<'de> Deserialize<'de> for Coverage { - fn deserialize(deserializer: D) -> Result +impl<'de> Deserialize<'de> for PyreportCoverage { + fn deserialize(deserializer: D) -> Result where D: de::Deserializer<'de>, { struct CoverageVisitor; - impl<'de> de::Visitor<'de> for CoverageVisitor { - type Value = Coverage; + impl de::Visitor<'_> for CoverageVisitor { + type Value = PyreportCoverage; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a coverage value") @@ -365,7 +366,7 @@ impl<'de> Deserialize<'de> for Coverage { E: de::Error, { if v { - Ok(Coverage::Partial) + Ok(PyreportCoverage::Partial()) } else { Err(de::Error::invalid_value(de::Unexpected::Bool(v), &self)) } @@ -375,7 +376,7 @@ impl<'de> Deserialize<'de> for Coverage { where E: de::Error, { - Ok(Coverage::HitCount(value as u32)) + Ok(PyreportCoverage::HitCount(value as u32)) } fn visit_str(self, v: &str) -> Result @@ -387,7 +388,7 @@ impl<'de> Deserialize<'de> for Coverage { let covered: u32 = covered.parse().map_err(|_| invalid())?; let total: u32 = total.parse().map_err(|_| invalid())?; - Ok(Coverage::BranchTaken(covered, total)) + Ok(PyreportCoverage::BranchesTaken { covered, total }) } } @@ -395,72 +396,109 @@ impl<'de> Deserialize<'de> for Coverage { } } +impl<'de> Deserialize<'de> for MissingBranch { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct MissingBranchVisitor; + impl de::Visitor<'_> for MissingBranchVisitor { + type Value = MissingBranch; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a missing branch value") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let invalid = || de::Error::invalid_value(de::Unexpected::Str(v), &self); + + if let Some((block, branch)) = v.split_once(":") { + let block: u32 = block.parse().map_err(|_| invalid())?; + let branch: u32 = branch.parse().map_err(|_| invalid())?; + + return Ok(MissingBranch::BlockAndBranch(block, branch)); + } + + if let Some(condition) = v.strip_suffix(":jump") { + let condition: u32 = condition.parse().map_err(|_| invalid())?; + + // TODO(swatinem): can we skip saving the `jump` here? + return Ok(MissingBranch::Condition(condition, Some("jump".into()))); + } + + let line: u32 = v.parse().map_err(|_| invalid())?; + Ok(MissingBranch::Line(line)) + } + } + + deserializer.deserialize_any(MissingBranchVisitor) + } +} + #[cfg(test)] mod tests { use super::*; #[test] - fn test_parsing_events() { + fn test_parsing_chunks() { let simple_line_record = LineRecord( - Coverage::HitCount(1), + PyreportCoverage::HitCount(1), None, - vec![LineSession(0, Coverage::HitCount(1), None, None, None)], + vec![LineSession( + 0, + PyreportCoverage::HitCount(1), + None, + None, + None, + )], None, None, None, ); + #[allow(clippy::type_complexity)] let cases: &[( &[u8], // input - HashMap, // labels index - &[(&[u32], &[Option])], // chunks: session ids, line records + &[&[Option]], // chunks: line records )] = &[ ( // Header and one chunk with an empty line b"{}\n<<<<< end_of_header >>>>>\n{}\n", - HashMap::default(), - &[(&[], &[])], + &[&[]], ), ( // No header, one chunk with a populated line and an empty line b"{}\n[1, null, [[0, 1]]]\n", - HashMap::default(), - &[(&[], &[Some(simple_line_record.clone())])], + &[&[Some(simple_line_record.clone())]], ), ( // No header, two chunks, the second having just one empty line b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", - HashMap::default(), - &[(&[], &[Some(simple_line_record.clone())]), (&[], &[])], + &[&[Some(simple_line_record.clone())], &[]], ), ( // Header, two chunks, the second having multiple data lines and an empty line b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", - HashMap::default(), &[ - (&[], &[Some(simple_line_record.clone())]), - ( - &[], - &[ - Some(simple_line_record.clone()), - Some(simple_line_record.clone()), - ], - ), + &[Some(simple_line_record.clone())], + &[ + Some(simple_line_record.clone()), + Some(simple_line_record.clone()), + ], ], ), ]; - for (input, expected_labels_index, expected_chunks) in cases { + for (input, expected_chunks) in cases { let chunks_file = ChunksFile::new(input).unwrap(); let mut chunks = chunks_file.chunks(); - assert_eq!(chunks_file.labels_index(), expected_labels_index); - - for (expected_sessions, expected_line_records) in *expected_chunks { + for expected_line_records in *expected_chunks { let mut chunk = chunks.next_chunk().unwrap().unwrap(); - assert_eq!(chunk.present_sessions(), *expected_sessions); - let mut lines = vec![]; while let Some(line) = chunk.next_line().unwrap() { lines.push(line); diff --git a/core/src/parsers/pyreport/report_json.rs b/core/src/parsers/pyreport/report_json.rs index f9a24e0..05da433 100644 --- a/core/src/parsers/pyreport/report_json.rs +++ b/core/src/parsers/pyreport/report_json.rs @@ -214,7 +214,7 @@ struct Session { session_extras: Option, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ParsedReportJson { pub files: HashMap, pub sessions: HashMap, diff --git a/core/src/report/models.rs b/core/src/report/models.rs index 17f63f9..41ca5d0 100644 --- a/core/src/report/models.rs +++ b/core/src/report/models.rs @@ -95,9 +95,12 @@ * and cast back to `u64` when querying. */ +use serde::Deserialize; + use crate::parsers::json::JsonVal; -#[derive(PartialEq, Debug, Clone, Copy, Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)] +#[serde(try_from = "&str")] pub enum CoverageType { #[default] Line = 1, diff --git a/core/src/report/pyreport/types.rs b/core/src/report/pyreport/types.rs index 0da5847..8402b80 100644 --- a/core/src/report/pyreport/types.rs +++ b/core/src/report/pyreport/types.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; +use serde::Deserialize; + pub use super::super::models::CoverageType; use crate::parsers::json::JsonVal; #[cfg(doc)] @@ -10,7 +12,7 @@ use crate::report::models; /// /// Most of the time, we can parse this field into a `HitCount` or /// `BranchesTaken`. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum PyreportCoverage { /// Contains the number of times the target was hit (or sometimes just 0 or /// 1). Most formats represent line and method coverage this way. In some @@ -41,7 +43,7 @@ pub enum Complexity { } /// Enum representing the possible shapes of data about missing branch coverage. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum MissingBranch { /// Identifies a specific branch by its "block" and "branch" numbers chosen /// by the instrumentation. Lcov does it this way. @@ -57,7 +59,7 @@ pub enum MissingBranch { } /// Struct representing a subspan of a single line and its coverage status. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] pub struct Partial { pub start_col: Option, pub end_col: Option, @@ -122,7 +124,7 @@ pub enum RawLabel { /// An object that is similar to a [`LineSession`], containing coverage /// measurements specific to a session. It is mostly redundant and ignored in /// this parser, save for the `labels` field which is not found anywhere else. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize)] pub struct CoverageDatapoint { /// This ID indicates which session the measurement was taken in. It can be /// used as a key in `buf.state.report_json_sessions` to get the ID of a diff --git a/test_utils/src/fixtures.rs b/test_utils/src/fixtures.rs index cdd08c7..e917cf2 100644 --- a/test_utils/src/fixtures.rs +++ b/test_utils/src/fixtures.rs @@ -66,9 +66,11 @@ pub fn read_fixture( name: &str, ) -> Result, &'static str> { // Just make sure the file exists and that it has been pulled from Git LFS - let _file = open_fixture(format, size, name)?; + let mut file = open_fixture(format, size, name)?; // Actually read and return the contents - let path = fixture_dir(format, size).join(name); - std::fs::read(path).map_err(|_| "failed to read file") + let mut buf = Vec::new(); + file.read_to_end(&mut buf) + .map_err(|_| "failed to read file")?; + Ok(buf) } From 70fa498befb2fc9ca49875bb7b66c37442dc9fad Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 12 Nov 2024 13:36:39 +0100 Subject: [PATCH 5/9] insert all datapoints --- core/src/parsers/pyreport/chunks_serde.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/core/src/parsers/pyreport/chunks_serde.rs b/core/src/parsers/pyreport/chunks_serde.rs index 2ec60fa..af887ae 100644 --- a/core/src/parsers/pyreport/chunks_serde.rs +++ b/core/src/parsers/pyreport/chunks_serde.rs @@ -69,6 +69,7 @@ where } let mut ctx = ParseCtx::new(builder, report_json.files, report_json.sessions); + ctx.labels_index = labels_index; let mut report_lines = vec![]; @@ -92,9 +93,23 @@ where }) .collect(); - let datapoints = line - .5 - .map(|dps| dps.into_iter().map(|dp| (dp.0, dp.into())).collect()); + let datapoints: Option> = line.5.map(|dps| { + dps.into_iter() + .map(|dp| (dp.0, types::CoverageDatapoint::from(dp))) + .collect() + }); + + if let Some(datapoints) = &datapoints { + for datapoint in datapoints.values() { + for label in &datapoint.labels { + if !ctx.labels_index.contains_key(label) { + let context = ctx.db.report_builder.insert_context(label)?; + ctx.labels_index.insert(label.into(), context.id); + } + } + } + } + let mut report_line = ReportLine { line_no, coverage: line.0, From 35f0674cf78653df2ac13d79a65b057bba4538ed Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 12 Nov 2024 13:50:38 +0100 Subject: [PATCH 6/9] rip out old chunks parser --- core/benches/pyreport.rs | 4 +- core/src/error.rs | 2 +- core/src/parsers/pyreport/chunks.rs | 2284 ++++----------------- core/src/parsers/pyreport/chunks_serde.rs | 527 ----- core/src/parsers/pyreport/mod.rs | 39 +- core/tests/test_pyreport_shim.rs | 28 +- 6 files changed, 462 insertions(+), 2422 deletions(-) delete mode 100644 core/src/parsers/pyreport/chunks_serde.rs diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index fceffef..8ef9849 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use codecov_rs::{ - parsers::pyreport::{chunks_serde, report_json}, + parsers::pyreport::{chunks, report_json}, test_utils::test_report::TestReportBuilder, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -104,5 +104,5 @@ fn complex_chunks(c: &mut Criterion) { fn parse_chunks_file_serde(input: &[u8], report_json: report_json::ParsedReportJson) { let report_builder = TestReportBuilder::default(); - chunks_serde::parse_chunks_file(input, report_json, report_builder).unwrap(); + chunks::parse_chunks_file(input, report_json, report_builder).unwrap(); } diff --git a/core/src/error.rs b/core/src/error.rs index b3d0563..e184141 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -1,6 +1,6 @@ use thiserror::Error; -use crate::parsers::pyreport::chunks_serde::ChunksFileParseError; +use crate::parsers::pyreport::chunks::ChunksFileParseError; pub type Result = std::result::Result; diff --git a/core/src/parsers/pyreport/chunks.rs b/core/src/parsers/pyreport/chunks.rs index 0c8c4a9..d4a8562 100644 --- a/core/src/parsers/pyreport/chunks.rs +++ b/core/src/parsers/pyreport/chunks.rs @@ -1,30 +1,55 @@ -use std::{collections::HashMap, fmt, fmt::Debug}; - -use winnow::{ - combinator::{ - alt, cut_err, delimited, empty, eof, opt, peek, preceded, separated, separated_pair, seq, - terminated, - }, - error::{ContextError, ErrMode, ErrorKind, FromExternalError, StrContext}, - stream::Stream, - PResult, Parser, Stateful, -}; - -use super::{ - super::{ - common::{ - winnow::{nullable, parse_u32, ws, StrStream}, - ReportBuilderCtx, +//! A parser for the `chunks` file format. +//! +//! A chunks file contains an optional header and a series of 1 or more +//! "chunks", separated by an `END_OF_CHUNK` terminator. +//! +//! Chunks files sometimes begin with a JSON object followed by an +//! `END_OF_HEADER` terminator string. +//! The JSON object contains: +//! - `"labels_index"`: assigns a numeric ID to each label to save space +//! +//! If the `"labels_index"` key is present, this parser will insert each label +//! into the report as a [`Context`](models::Context) and create a mapping +//! in `buf.state.labels_index` from numeric ID in the header to the +//! new [`Context`](models::Context)'s ID in the output report. If the +//! `"labels_index"` key is _not_ present, we will populate +//! `buf.state.labels_index` gradually as we encounter new labels during +//! parsing. +//! +//! A chunk contains all of the line-by-line measurements for +//! a file. The Nth chunk corresponds to the file whose entry in +//! `buf.state.report_json_files` has N in its `chunks_index` field. +//! +//! Each new chunk will reset `buf.state.chunk.current_line` to 0 when it starts +//! and increment `buf.state.chunk.index` when it ends so that the next chunk +//! can associate its data with the correct file. +//! +//! A line may be empty, or it may contain a [`LineRecord`]. +//! A [`LineRecord`] itself does not correspond to anything in the output, +//! but it's an umbrella that includes all of the data +//! tied to a line/[`CoverageSample`](models::CoverageSample). +//! +//! This parser performs all the writes it can to the output +//! stream and only returns a [`ReportLine`] for tests. The +//! `report_line_or_empty` parser which wraps this and supports empty lines +//! returns `Ok(())`. + +use std::{collections::HashMap, fmt, mem, sync::OnceLock}; + +use memchr::{memchr, memmem}; +use serde::{de, de::IgnoredAny, Deserialize}; + +use super::{report_json::ParsedReportJson, utils}; +use crate::{ + error::CodecovError, + parsers::common::ReportBuilderCtx, + report::{ + pyreport::{ + types::{self, CoverageType, MissingBranch, Partial, PyreportCoverage, ReportLine}, + CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR, }, - json::{json_value, parse_object, parse_str, JsonMap, JsonVal}, + Report, ReportBuilder, }, - utils, -}; -#[cfg(doc)] -use crate::report::models; -use crate::report::{ - pyreport::{types::*, CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR}, - Report, ReportBuilder, }; #[derive(PartialEq, Debug)] @@ -67,8 +92,6 @@ pub struct ParseCtx> { pub report_json_sessions: HashMap, } -pub type ReportOutputStream = Stateful>; - impl> ParseCtx { pub fn new( report_builder: B, @@ -88,7 +111,7 @@ impl> ParseCtx { } } -impl> Debug for ParseCtx { +impl> fmt::Debug for ParseCtx { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ParseCtx") .field("db", &self.db) @@ -98,1896 +121,477 @@ impl> Debug for ParseCtx { } } -/// Parses the possible values of the "coverage" field in a [`ReportLine`] or -/// [`LineSession`]. See [`PyreportCoverage`]. Most of the time, this field can -/// be parsed into a `HitCount` or `BranchesTaken`. -/// -/// Clojure's Cloverage tool [uses `true` for partial coverage](https://github.com/cloverage/cloverage/blob/87fd10f82ea7c0f47c03354105e513b160d1e047/cloverage/src/cloverage/report/codecov.clj#L10) -/// with no information about covered/missed branches, and this quirk made its -/// way into the chunks format as well. -/// -/// Examples: `0`, `1`, `"0/2"`, `"3/4"`, `"2/2"`, `true` -pub fn coverage>( - buf: &mut ReportOutputStream, -) -> PResult { - alt(( - // Clojure's Cloverage tool does this. - "true".value(PyreportCoverage::Partial()), - // Examples: "0/2", "1/2", "2/2" - delimited('"', separated_pair(parse_u32, '/', parse_u32), '"') - .map(move |(covered, total)| PyreportCoverage::BranchesTaken { covered, total }), - // Examples: 0, 40 - parse_u32.map(PyreportCoverage::HitCount), - )) - .context(StrContext::Label("coverage")) - .parse_next(buf) -} - -/// Parses the coverage type described by a [`ReportLine`]. Beware: this field -/// may be inaccurate. -/// -/// For example, in a chunks file for a Go project, the "coverage type" field is -/// always `null` when some of the values in the "coverage" field indicate the -/// line describes branch coverage. -/// -/// Examples: `null`, `"line"`, `"b"`, `"branch"`, `"m"`, `"method"` -pub fn coverage_type>( - buf: &mut ReportOutputStream, -) -> PResult { - alt(( - alt(("\"line\"", "null")).value(CoverageType::Line), - alt(("\"b\"", "\"branch\"")).value(CoverageType::Branch), - alt(("\"m\"", "\"method\"")).value(CoverageType::Method), - )) - .context(StrContext::Label("coverage_type")) - .parse_next(buf) -} - -/// Parses value of the "complexity" field in a [`ReportLine`] or -/// [`LineSession`]. -/// -/// Examples: `1`, `3`, `[0, 1]`, `[2, 2]` -pub fn complexity>( - buf: &mut ReportOutputStream, -) -> PResult { - alt(( - delimited( - ('[', ws), - separated_pair(parse_u32, (ws, ',', ws), parse_u32), - (ws, ']'), - ) - .map(move |(covered, total)| Complexity::PathsTaken { covered, total }), - parse_u32.map(Complexity::Total), - )) - .context(StrContext::Label("complexity")) - .parse_next(buf) -} - -/// Attempts to parse the values in the "branches" field of a [`LineSession`] -/// which is a list of missing branches. -/// -/// There are myriad ways different coverage formats have represented branch -/// coverage data and they each show up in chunks files in their own quirky way. -/// -/// - `["0:0", "0:1", "1:0", "1:1"]` is an example of -/// [`MissingBranch::BlockAndBranch`] coverage. This is how the chunks file -/// represents Lcov `BRDA` branch records. -/// - `["0:jump", "1", "2", "3"]` is an example of [`MissingBranch::Condition`] -/// coverage. This is how Cobertura does it sometimes? -/// - `["26", "27"]` is an example of [`MissingBranch::Line`] coverage. This is -/// how Cobertura does it when generated by coverage.py. -/// -/// We lack a way to convert between formats so we are unable to normalize this -/// data. -/// -/// [There may yet be more ways this shows -/// up](https://github.com/codecov/worker/blob/07405e0ae925f00aa7bb3e2d828537010901154b/services/report/languages/cobertura.py#L112-L114). -/// We'll try our best, and that'll have to do. -pub fn missing_branches<'a, S, R: Report, B: ReportBuilder>( - buf: &mut ReportOutputStream, -) -> PResult> -where - S: StrStream, - S: Stream, -{ - let block_and_branch = separated_pair(parse_u32, ':', parse_u32); - let block_and_branch = delimited('"', block_and_branch, '"'); - let block_and_branch = - block_and_branch.map(move |(block, branch)| MissingBranch::BlockAndBranch(block, branch)); - - let condition_type = opt(preceded(':', "jump")); - - let condition = (parse_u32, condition_type); - let condition = delimited('"', condition, '"'); - let condition = condition.map(move |(cond, cond_type)| { - MissingBranch::Condition(cond, cond_type.map(move |s: &str| s.to_string())) - }); - - let line = delimited('"', parse_u32, '"').map(MissingBranch::Line); - - delimited( - ('[', ws), - alt(( - // Match 1 or more in the first two cases. If we matched 0 or more, the first case - // would technically always succeed and never try later ones. - separated(1.., line, (ws, ',', ws)), - separated(1.., block_and_branch, (ws, ',', ws)), - // Match 0 or more in the last case to allow for an empty list. - separated(0.., condition, (ws, ',', ws)), - )), - (ws, ']'), - ) - .context(StrContext::Label("missing_branches")) - .parse_next(buf) -} - -/// Parses values in the "partials" field of a [`LineSession`]. These values -/// don't necessarily have to do with partial branch coverage; what they -/// describe is the coverage status of different subspans of a single line. -/// -/// Examples: -/// - `[null, 10, 0]`: This line was not covered from its start until column 10 -/// - `[11, 30, 1]`: This line was covered from column 11 to column 30 -/// - `[31, 40, 0]`: This line was not covered from column 31 to column 40 -/// - `[41, null, 1]`: This line was covered from column 41 until its end -/// -/// Not all subspans of a line will necessarily be covered. -/// -/// Some coverage formats report coverage "spans" or "locs" which can be spread -/// across multiple lines. Our parsers generally only record spans that start -/// and end on the same line in the chunks file, or we split a single span into -/// two: one for the start line and one for the end line. The fact that lines -/// between are part of the span is lost. -pub fn partial_spans>( - buf: &mut ReportOutputStream, -) -> PResult> { - let span = separated_pair(nullable(parse_u32), (ws, ',', ws), nullable(parse_u32)); - let span_with_coverage = separated_pair(span, (ws, ',', ws), coverage).map( - move |((start_col, end_col), coverage)| Partial { - start_col, - end_col, - coverage, - }, - ); - let span_with_coverage = delimited('[', span_with_coverage, ']'); - - delimited('[', separated(0.., span_with_coverage, (ws, ',', ws)), ']') - .context(StrContext::Label("partial_spans")) - .parse_next(buf) -} - -/// Parses a [`LineSession`]. Each [`LineSession`] corresponds to a -/// [`CoverageSample`](models::CoverageSample) in the output report. -/// -/// A [`ReportLine`] has a [`LineSession`] for each upload ("session") sent to -/// us for a commit. The [`LineSession`] contains the coverage measurements for -/// that session. -/// -/// Trailing null fields may be omitted. -pub fn line_session<'a, S, R: Report, B: ReportBuilder>( - buf: &mut ReportOutputStream, -) -> PResult +pub fn parse_chunks_file( + input: &[u8], + report_json: ParsedReportJson, + mut builder: B, +) -> Result<(), CodecovError> where - S: StrStream, - S: Stream, + B: ReportBuilder, + R: Report, { - seq! {LineSession { - _: '[', - session_id: parse_u32.map(|n| n as usize), - _: (ws, ',', ws), - coverage: coverage, - _: opt((ws, ',', ws)), - branches: opt(nullable(missing_branches)), - _: opt((ws, ',', ws)), - partials: opt(nullable(partial_spans)), - _: opt((ws, ',', ws)), - complexity: opt(nullable(complexity)), - _: ']', - }} - .context(StrContext::Label("line_session")) - .parse_next(buf) -} - -/// No idea what this field contains. Guessing it's JSON so if we ever encounter -/// it we can at least consume it off the stream and continue parsing. -pub fn messages<'a, S, R: Report, B: ReportBuilder>( - buf: &mut ReportOutputStream, -) -> PResult -where - S: StrStream, - S: Stream, -{ - json_value - .context(StrContext::Label("messages")) - .parse_next(buf) -} - -/// Parses an individual [`RawLabel`] in a [`CoverageDatapoint`]. -/// -/// Examples: -/// - `"Th2dMtk4M_codecov"` -/// - `"tests/unit/test_analytics_tracking.py::test_get_tools_manager"` -/// - `1` -/// - `5` -/// -/// If the label is already in `buf.state.labels_index`, return it as a string. -/// If it's not, insert it into the database, insert a mapping from the label to -/// the DB PK, and then return it as a string. -pub fn label>( - buf: &mut ReportOutputStream, -) -> PResult { - let raw_label = alt(( - parse_u32.map(RawLabel::LabelId), - parse_str.map(RawLabel::LabelName), - )) - .context(StrContext::Label("label")) - .parse_next(buf)?; - - let labels_index_key = match raw_label { - RawLabel::LabelId(id) => id.to_string(), - RawLabel::LabelName(name) => name, - }; + let chunks_file = ChunksFile::new(input)?; - match buf.state.labels_index.get(&labels_index_key) { - Some(_) => Ok(labels_index_key), - None => { - let context = buf - .state - .db - .report_builder - .insert_context(&labels_index_key) - .map_err(|e| ErrMode::from_external_error(buf, ErrorKind::Fail, e))?; - buf.state.labels_index.insert(context.name, context.id); - Ok(labels_index_key) - } + let mut labels_index = HashMap::with_capacity(chunks_file.labels_index().len()); + for (index, name) in chunks_file.labels_index() { + let context = builder.insert_context(name)?; + labels_index.insert(index.clone(), context.id); } -} -/// Parses the (largely redundant) [`CoverageDatapoint`]. Most of its fields are -/// also found on [`ReportLine`] or [`LineSession`], except for the `labels` -/// field. -/// -/// Technically `_coverage_type` is optional, but the way it gets serialized -/// when it's missing is identical to the way we serialize -/// [`CoverageType::Line`] so there's no way to tell -/// which it is when deserializing. -pub fn coverage_datapoint>( - buf: &mut ReportOutputStream, -) -> PResult<(u32, CoverageDatapoint)> { - let datapoint = seq! {CoverageDatapoint { - _: '[', - session_id: parse_u32, - _: (ws, ',', ws), - _coverage: coverage, - _: (ws, ',', ws), - _coverage_type: nullable(coverage_type), - _: (ws, ',', ws), - labels: delimited('[', separated(0.., label, (ws, ',', ws)), ']'), - _: ']', - }} - .context(StrContext::Label("coverage_datapoint")) - .parse_next(buf)?; - Ok((datapoint.session_id, datapoint)) -} + let mut ctx = ParseCtx::new(builder, report_json.files, report_json.sessions); + ctx.labels_index = labels_index; + + let mut report_lines = vec![]; + + let mut chunks = chunks_file.chunks(); + let mut chunk_no = 0; + while let Some(mut chunk) = chunks.next_chunk()? { + let mut line_no = 0; + report_lines.clear(); + while let Some(line) = chunk.next_line()? { + line_no += 1; + if let Some(line) = line { + let sessions = line + .2 + .into_iter() + .map(|session| types::LineSession { + session_id: session.0, + coverage: session.1, + branches: session.2.into(), + partials: session.3.into(), + complexity: None, // TODO + }) + .collect(); + + let datapoints: Option> = line.5.map(|dps| { + dps.into_iter() + .map(|dp| (dp.0, types::CoverageDatapoint::from(dp))) + .collect() + }); + + if let Some(datapoints) = &datapoints { + for datapoint in datapoints.values() { + for label in &datapoint.labels { + if !ctx.labels_index.contains_key(label) { + let context = ctx.db.report_builder.insert_context(label)?; + ctx.labels_index.insert(label.into(), context.id); + } + } + } + } + + let mut report_line = ReportLine { + line_no, + coverage: line.0, + coverage_type: line.1.unwrap_or_default(), + sessions, + _messages: None, + _complexity: None, + datapoints: Some(datapoints), + }; + report_line.normalize(); + report_lines.push(report_line); + } + } -/// Parses a [`ReportLine`]. A [`ReportLine`] itself does not correspond to -/// anything in the output, but it's an umbrella that includes all of the data -/// tied to a line/[`CoverageSample`](models::CoverageSample). -/// -/// This parser performs all the writes it can to the output -/// stream and only returns a [`ReportLine`] for tests. The -/// `report_line_or_empty` parser which wraps this and supports empty lines -/// returns `Ok(())`. -pub fn report_line<'a, S, R: Report, B: ReportBuilder>( - buf: &mut ReportOutputStream, -) -> PResult -where - S: StrStream, - S: Stream, -{ - let line_no = buf.state.chunk.current_line; - let mut report_line = seq! {ReportLine { - line_no: empty.value(line_no), - _: '[', - coverage: coverage, - _: (ws, ',', ws), - coverage_type: coverage_type, - _: (ws, ',', ws), - sessions: delimited('[', separated(0.., line_session, (ws, ',', ws)), ']'), -// _: (ws, ',', ws), - _messages: opt(preceded((ws, ',', ws), nullable(messages))), -// _: (ws, ',', ws), - _complexity: opt(preceded((ws, ',', ws), nullable(complexity))), -// _: (ws, ',', ws), - datapoints: opt(preceded((ws, ',', ws), nullable(delimited('[', separated(0.., coverage_datapoint, (ws, ',', ws)), ']')))), - _: ']', - }} - .context(StrContext::Label("report_line")) - .parse_next(buf)?; - - // Fix issues like recording branch coverage with `CoverageType::Method` - let (correct_coverage, correct_type) = - normalize_coverage_measurement(&report_line.coverage, &report_line.coverage_type); - report_line.coverage = correct_coverage; - report_line.coverage_type = correct_type; - - // Fix the `coverage` values in each `LineSession` as well - for line_session in report_line.sessions.iter_mut() { - let (correct_coverage, _) = - normalize_coverage_measurement(&line_session.coverage, &report_line.coverage_type); - line_session.coverage = correct_coverage; + ctx.chunk.index = chunk_no; + utils::save_report_lines(&report_lines, &mut ctx)?; + chunk_no += 1; } - Ok(report_line) -} - -/// Parses each line in a chunk. A line may be empty, or it may contain a -/// [`ReportLine`]. Either way, we need to update the `current_line` value in -/// our parser context. -/// -/// The `report_line` parser writes all the data it can to the output -/// stream so we don't actually need to return anything to our caller. -pub fn report_line_or_empty<'a, S, R: Report, B: ReportBuilder>( - buf: &mut ReportOutputStream, -) -> PResult> -where - S: StrStream, - S: Stream, -{ - buf.state.chunk.current_line += 1; - - // A line is empty if the next character is `\n` or EOF. We don't consume that - // next character from the stream though - we leave it there as either the - // delimeter between lines or part of `CHUNKS_FILE_END_OF_CHUNK`. - let empty_line = peek(alt((eof, "\n"))).map(|_| None); - let populated_line = report_line.map(Some); - alt((populated_line, empty_line)) - .context(StrContext::Label("report_line_or_empty")) - .parse_next(buf) -} - -/// Each chunk may begin with a JSON object containing: -/// - "present_sessions": a list of sessions referenced -/// -/// TODO: Verify that all keys are known. -pub fn chunk_header>( - buf: &mut ReportOutputStream, -) -> PResult> { - terminated(parse_object, '\n') - .context(StrContext::Label("chunk_header")) - .parse_next(buf) + Ok(()) } -/// Parses a "chunk". A chunk contains all of the line-by-line measurements for -/// a file. The Nth chunk corresponds to the file whose entry in -/// `buf.state.report_json_files` has N in its `chunks_index` field. -/// -/// Each new chunk will reset `buf.state.chunk.current_line` to 0 when it starts -/// and increment `buf.state.chunk.index` when it ends so that the next chunk -/// can associate its data with the correct file. -pub fn chunk<'a, S, R: Report, B: ReportBuilder>( - buf: &mut ReportOutputStream, -) -> PResult<()> -where - S: StrStream, - S: Stream, -{ - // New chunk, start back at line 0. - buf.state.chunk.current_line = 0; - - let empty_chunk = terminated("null", peek(alt((eof, "\n")))).map(|_| Vec::new()); - let report_lines = preceded( - cut_err(chunk_header), - cut_err(separated(1.., report_line_or_empty, '\n')), - ); - - let parsed_lines: Vec<_> = alt((empty_chunk, report_lines)) - .context(StrContext::Label("chunk")) - .parse_next(buf)?; - - let parsed_lines: Vec = parsed_lines.into_iter().flatten().collect(); - - utils::save_report_lines(parsed_lines.as_slice(), &mut buf.state) - .map_err(|e| ErrMode::from_external_error(buf, ErrorKind::Fail, e))?; - - // Advance our chunk index so we can associate the data from the next chunk with - // the correct file from the report JSON. - buf.state.chunk.index += 1; - - Ok(()) +#[derive(Debug, thiserror::Error)] +pub enum ChunksFileParseError { + #[error("unexpected EOF")] + UnexpectedEof, + #[error("unexpected input")] + UnexpectedInput, + #[error("invalid file header")] + InvalidFileHeader(#[source] serde_json::Error), + #[error("invalid chunk header")] + InvalidChunkHeader(#[source] serde_json::Error), + #[error("invalid line record")] + InvalidLineRecord(#[source] serde_json::Error), } -/// Chunks files sometimes begin with a JSON object followed by a terminator -/// string. The JSON object contains: -/// - `"labels_index"`: assigns a numeric ID to each label to save space -/// -/// If the `"labels_index"` key is present, this parser will insert each label -/// into the report as a [`crate::report::models::Context`] and create a mapping -/// in `buf.state.labels_index` from numeric ID in the header to the -/// new `Context`'s ID in the output report. If the `"labels_index"` key is -/// _not_ present, we will populate `buf.state.labels_index` gradually as we -/// encounter new labels during parsing. -pub fn chunks_file_header>( - buf: &mut ReportOutputStream, -) -> PResult<()> { - let header = terminated(parse_object, CHUNKS_FILE_HEADER_TERMINATOR) - .context(StrContext::Label("chunks_file_header")) - .parse_next(buf)?; - - let labels_iter = header - .get("labels_index") - .and_then(JsonVal::as_object) - .into_iter() - .flatten(); - for (index, name) in labels_iter { - let Some(name) = name.as_str() else { - return Err(ErrMode::Cut(ContextError::new())); - }; - let context = buf - .state - .db - .report_builder - .insert_context(name) - .map_err(|e| ErrMode::from_external_error(buf, ErrorKind::Fail, e))?; - buf.state.labels_index.insert(index.clone(), context.id); +impl PartialEq for ChunksFileParseError { + fn eq(&self, other: &Self) -> bool { + core::mem::discriminant(self) == core::mem::discriminant(other) } - - Ok(()) } +impl Eq for ChunksFileParseError {} -/// Parses a chunks file. A chunks file contains an optional header and a series -/// of 1 or more "chunks" separated by an `CHUNKS_FILE_END_OF_CHUNK` terminator. -pub fn parse_chunks_file<'a, S, R: Report, B: ReportBuilder>( - buf: &mut ReportOutputStream, -) -> PResult<()> -where - S: StrStream, - S: Stream, -{ - let _: Vec<_> = preceded( - opt(chunks_file_header), - separated(1.., chunk, CHUNKS_FILE_END_OF_CHUNK), - ) - .context(StrContext::Label("parse_chunks_file")) - .parse_next(buf)?; - - Ok(()) +#[derive(Debug)] +pub struct ChunksFile<'d> { + file_header: FileHeader, + input: &'d [u8], } -#[cfg(test)] -mod tests { - use winnow::error::AddContext; - - use super::*; - use crate::{ - report::models::*, - test_utils::test_report::{TestReport, TestReportBuilder}, - }; - - type TestStream<'a> = ReportOutputStream<&'a str, TestReport, TestReportBuilder>; - - struct Ctx { - parse_ctx: ParseCtx, - } - - fn setup() -> Ctx { - let report_builder = TestReportBuilder::default(); - let report_json_files = HashMap::from([(0, 0), (1, 1), (2, 2)]); - let report_json_sessions = HashMap::from([(0, 0), (1, 1), (2, 2)]); - - let parse_ctx = ParseCtx::new(report_builder, report_json_files, report_json_sessions); - - Ctx { parse_ctx } - } +impl<'d> ChunksFile<'d> { + pub fn new(mut input: &'d [u8]) -> Result { + static HEADER_FINDER: OnceLock = OnceLock::new(); + let header_finder = + HEADER_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_HEADER_TERMINATOR)); + + let file_header = if let Some(pos) = header_finder.find(input) { + let header_bytes = &input[..pos]; + input = &input[pos + header_finder.needle().len()..]; + let file_header: FileHeader = serde_json::from_slice(header_bytes) + .map_err(ChunksFileParseError::InvalidFileHeader)?; + file_header + } else { + FileHeader::default() + }; - fn create_context_error(contexts: Vec) -> ContextError { - let mut err = ContextError::new(); - for context in contexts { - err = err.add_context(&"", context.clone()); - } - err + Ok(Self { file_header, input }) } - fn backtrack_error_with_contexts(contexts: Vec) -> ErrMode { - ErrMode::Backtrack(create_context_error(contexts)) + pub fn labels_index(&self) -> &HashMap { + &self.file_header.labels_index } - fn cut_error_with_contexts(contexts: Vec) -> ErrMode { - ErrMode::Cut(create_context_error(contexts)) + pub fn chunks(&self) -> Chunks { + Chunks { input: self.input } } +} - #[test] - fn test_pyreport_coverage() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - - let test_cases = [ - ("1", Ok(PyreportCoverage::HitCount(1))), - ("3", Ok(PyreportCoverage::HitCount(3))), - ("1.23456e5", Ok(PyreportCoverage::HitCount(123456))), - // Clamp values to u32 range - ("99999999999999", Ok(PyreportCoverage::HitCount(u32::MAX))), - ( - "\"1/2\"", - Ok(PyreportCoverage::BranchesTaken { - covered: 1, - total: 2, - }), - ), - ( - "\"4/4\"", - Ok(PyreportCoverage::BranchesTaken { - covered: 4, - total: 4, - }), - ), - ("true", Ok(PyreportCoverage::Partial())), - // Malformed inputs - ( - "malformed", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage", - )])), - ), - ( - "false", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage", - )])), - ), - ( - "\"true\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage", - )])), - ), - ( - "\"1\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage", - )])), - ), - ( - "\"1/\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage", - )])), - ), - ( - "\"/2\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage", - )])), - ), - ( - "\"1/2", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage", - )])), - ), - // TODO: Make these cases error. Currently this parser accepts any number and - // clamps/truncates to u32. - ("3.4", Ok(PyreportCoverage::HitCount(3))), - ("-3", Ok(PyreportCoverage::HitCount(0))), - // TODO: Make this case error or clamp to fractions <= 1 - ( - "\"5/4\"", - Ok(PyreportCoverage::BranchesTaken { - covered: 5, - total: 4, - }), - ), - // This input is invalid because it's not encapsulated by ""s. Ideally we would - // error here, but instead we parse this as `HitCount(1)` and rely on - // the parent to yell when it encounters the `/` instead of a `,` or - // whatever. - ("1/2", Ok(PyreportCoverage::HitCount(1))), - ]; +pub struct Chunks<'d> { + input: &'d [u8], +} - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(coverage.parse_next(&mut buf), test_case.1); +impl<'d> Chunks<'d> { + pub fn next_chunk(&mut self) -> Result>, ChunksFileParseError> { + if self.input.is_empty() { + return Ok(None); } - } - - #[test] - fn test_coverage_type() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - let test_cases = [ - ("null", Ok(CoverageType::Line)), - ("\"line\"", Ok(CoverageType::Line)), - ("\"b\"", Ok(CoverageType::Branch)), - ("\"branch\"", Ok(CoverageType::Branch)), - ("\"m\"", Ok(CoverageType::Method)), - ("\"method\"", Ok(CoverageType::Method)), - ]; + static CHUNK_FINDER: OnceLock = OnceLock::new(); + let chunk_finder = + CHUNK_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_END_OF_CHUNK)); - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(coverage_type.parse_next(&mut buf), test_case.1); - } - } - - #[test] - fn test_complexity() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, + let mut chunk_bytes = if let Some(pos) = chunk_finder.find(self.input) { + let chunk_bytes = &self.input[..pos]; + self.input = &self.input[pos + chunk_finder.needle().len()..]; + chunk_bytes + } else { + mem::take(&mut self.input) }; - let test_cases = [ - ("1", Ok(Complexity::Total(1))), - ("5", Ok(Complexity::Total(5))), - ("1.2345e4", Ok(Complexity::Total(12345))), - ("999999999999999", Ok(Complexity::Total(u32::MAX))), - ( - "[5, 5]", - Ok(Complexity::PathsTaken { - covered: 5, - total: 5, - }), - ), - ( - "[5, 1.2345e4]", - Ok(Complexity::PathsTaken { - covered: 5, - total: 12345, - }), - ), - ( - "[ 5 ,5 ]", // Ignore whitespace - Ok(Complexity::PathsTaken { - covered: 5, - total: 5, - }), - ), - // Malformed inputs - ( - "\"1\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "[1, 5 5]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "[1, 5, 6]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "\"[1, 5]\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "[\"1\", \"5\"]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "[1, 5", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "[1, ]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "[, 3]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "[1]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - ( - "one", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "complexity", - )])), - ), - // TODO: Make these cases error. Currently the parser accepts any number and - // clamps/truncates to u32 range. - ("-3", Ok(Complexity::Total(0))), - ("3.4", Ok(Complexity::Total(3))), - // TODO: Make this case error or clamp to ratios <= 1. - ( - "[2, 1]", - Ok(Complexity::PathsTaken { - covered: 2, - total: 1, - }), - ), - ]; - - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(complexity.parse_next(&mut buf), test_case.1); + if chunk_bytes == b"null" { + return Ok(Some(Chunk { + chunk_header: ChunkHeader::default(), + input: &[], + })); } - } - - #[test] - fn test_missing_branches() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - let test_cases = [ - ("[]", Ok(vec![])), - ( - "[\"0:jump\"]", - Ok(vec![MissingBranch::Condition(0, Some("jump".to_string()))]), - ), - ( - "[\"0:jump\", \"1\", \"2\"]", - Ok(vec![ - MissingBranch::Condition(0, Some("jump".to_string())), - MissingBranch::Condition(1, None), - MissingBranch::Condition(2, None), - ]), - ), - ( - "[\"26\", \"28\"]", - Ok(vec![MissingBranch::Line(26), MissingBranch::Line(28)]), - ), - ( - "[\"0:0\", \"0:1\", \"1:0\", \"1:1\"]", - Ok(vec![ - MissingBranch::BlockAndBranch(0, 0), - MissingBranch::BlockAndBranch(0, 1), - MissingBranch::BlockAndBranch(1, 0), - MissingBranch::BlockAndBranch(1, 1), - ]), - ), - // Malformed inputs - ( - "[26, 28]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - "[\"26\", 28]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - "[0:jump, 28]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - "\"0:jump\", \"28\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - "\"[\"26\", \"28\"]\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - "[\"26\", \"28\"", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - // Can't switch types in the middle of a list - "[\"0:jump\", \"0:1\", \"26\"]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - // Can't switch types in the middle of a list - "[\"0:1\", \"0:jump\", \"26\"]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - // Can't switch types in the middle of a list - "[\"26\", \"0:jump\", \"0:1\"]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ( - // Can't switch types in the middle of a list. Actually expected this to pass - // because `"26"` is a valid `Condition` value, but it fails - "[\"26\", \"0:jump\"]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "missing_branches", - )])), - ), - ]; + let header_bytes = + next_line(&mut chunk_bytes).ok_or(ChunksFileParseError::UnexpectedInput)?; + let chunk_header: ChunkHeader = serde_json::from_slice(header_bytes) + .map_err(ChunksFileParseError::InvalidFileHeader)?; - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(missing_branches.parse_next(&mut buf), test_case.1); - } + Ok(Some(Chunk { + chunk_header, + input: chunk_bytes, + })) } +} - #[test] - fn test_partial_spans() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - - let test_cases = [ - ("[]", Ok(vec![])), - ( - "[[null, 10, 1]]", - Ok(vec![Partial { - start_col: None, - end_col: Some(10), - coverage: PyreportCoverage::HitCount(1), - }]), - ), - ( - "[[10, null, 0]]", - Ok(vec![Partial { - start_col: Some(10), - end_col: None, - coverage: PyreportCoverage::HitCount(0), - }]), - ), - ( - "[[null, 10, 1], [10, null, 0]]", - Ok(vec![ - Partial { - start_col: None, - end_col: Some(10), - coverage: PyreportCoverage::HitCount(1), - }, - Partial { - start_col: Some(10), - end_col: None, - coverage: PyreportCoverage::HitCount(0), - }, - ]), - ), - ( - "[[5, 10, 3]]", - Ok(vec![Partial { - start_col: Some(5), - end_col: Some(10), - coverage: PyreportCoverage::HitCount(3), - }]), - ), - // Technically supported, but not expected - ( - "[[null, 10, \"2/2\"]]", - Ok(vec![Partial { - start_col: None, - end_col: Some(10), - coverage: PyreportCoverage::BranchesTaken { - covered: 2, - total: 2, - }, - }]), - ), - ( - "[[null, 10, true]]", - Ok(vec![Partial { - start_col: None, - end_col: Some(10), - coverage: PyreportCoverage::Partial(), - }]), - ), - // Malformed inputs - ( - "[5, 10, 3]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "partial_spans", - )])), - ), - ( - "[[5, 10, 3]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "partial_spans", - )])), - ), - ( - "[5, 10, 3]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "partial_spans", - )])), - ), - ( - "[[\"5\", \"10\", 3]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "partial_spans", - )])), - ), - ( - "[[\"5\", \"null\", 3]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "partial_spans", - )])), - ), - ( - "[[5, 10, 3, 5]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "partial_spans", - )])), - ), - ( - "[[5, 3]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "partial_spans", - )])), - ), - // TODO: Reject when end_col is smaller than start_col - ( - "[[5, 3, 5]]", - Ok(vec![Partial { - start_col: Some(5), - end_col: Some(3), - coverage: PyreportCoverage::HitCount(5), - }]), - ), - ]; +pub struct Chunk<'d> { + chunk_header: ChunkHeader, + input: &'d [u8], +} - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(partial_spans.parse_next(&mut buf), test_case.1); - } +impl Chunk<'_> { + pub fn present_sessions(&self) -> &[u32] { + &self.chunk_header.present_sessions } - #[test] - fn test_line_session() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, + pub fn next_line(&mut self) -> Result>, ChunksFileParseError> { + let Some(line) = next_line(&mut self.input) else { + return Ok(None); }; - let test_cases = [ - ( - "[0, 1]", - Ok(LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }), - ), - ( - "[0, 1, null, null, null]", - Ok(LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: Some(None), - partials: Some(None), - complexity: Some(None), - }), - ), - ( - "[0, 1, [\"0:jump\"]]", - Ok(LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: Some(Some(vec![MissingBranch::Condition( - 0, - Some("jump".to_string()), - )])), - partials: None, - complexity: None, - }), - ), - ( - "[0, 1, null, [[10, 15, 1]], null]", - Ok(LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: Some(None), - partials: Some(Some(vec![Partial { - start_col: Some(10), - end_col: Some(15), - coverage: PyreportCoverage::HitCount(1), - }])), - complexity: Some(None), - }), - ), - ( - "[0, 1, null, null, 3]", - Ok(LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: Some(None), - partials: Some(None), - complexity: Some(Some(Complexity::Total(3))), - }), - ), - ( - "[0, 1, null, null, [1, 2]]", - Ok(LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: Some(None), - partials: Some(None), - complexity: Some(Some(Complexity::PathsTaken { - covered: 1, - total: 2, - })), - }), - ), - // Malformed inputs - ( - "[0]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "line_session", - )])), - ), - ( - "[0, 1", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "line_session", - )])), - ), - ( - "0, 1]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "line_session", - )])), - ), - ( - "[0, null]", - Err(backtrack_error_with_contexts(vec![ - StrContext::Label("coverage"), - StrContext::Label("line_session"), - ])), - ), - ( - "[null, 1]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "line_session", - )])), - ), - ( - "[\"0\", 1]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "line_session", - )])), - ), - ( - "[0, \"1\"]", - Err(backtrack_error_with_contexts(vec![ - StrContext::Label("coverage"), - StrContext::Label("line_session"), - ])), - ), - ( - "[0, 1, null, null, null, null]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "line_session", - )])), - ), - ( - // TODO: Should fail. `partials` must be preceded by `branches` or `null` but it - // isn't here. - "[0, 1, [[10, 15, 1]]]", - Ok(LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: Some(Some(vec![Partial { - start_col: Some(10), - end_col: Some(15), - coverage: PyreportCoverage::HitCount(1), - }])), - complexity: None, - }), - ), - ]; - - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(line_session.parse_next(&mut buf), test_case.1); + if line.is_empty() { + return Ok(Some(None)); } - } - - #[test] - fn test_messages() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - // No idea what `messages` actually is! Guessing it's JSON. - let test_cases = [ - ("null", Ok(JsonVal::Null)), - ("{}", Ok(JsonVal::Object(JsonMap::new()))), - ]; - - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(messages.parse_next(&mut buf), test_case.1); - } + let line_record: LineRecord = + serde_json::from_slice(line).map_err(ChunksFileParseError::InvalidLineRecord)?; + Ok(Some(Some(line_record))) } +} - #[test] - fn test_label() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - - buf.state.labels_index = HashMap::from([ - ("already_inserted".to_string(), 100), - ("1".to_string(), 101), - ]); - - // Parsing a label that is already in `labels_index` should just return it - buf.input = "\"already_inserted\""; - assert_eq!( - label.parse_next(&mut buf), - Ok("already_inserted".to_string()) - ); - - // If we parse a number like `1`, we should look for `"1"` in the labels index. - buf.input = "1"; - assert_eq!(label.parse_next(&mut buf), Ok("1".to_string())); - - // Parsing a label that is not already in `labels_index` should insert it - buf.input = "\"not_already_inserted\""; - assert_eq!( - label.parse_next(&mut buf), - Ok("not_already_inserted".to_string()) - ); - assert_eq!( - buf.state.db.report_builder.report.contexts, - &[Context::new("not_already_inserted")] - ); - - // Malformed labels should never get to inserting - let malformed_test_cases = [ - // Not wrapped in quotes - "already_inserted", - "\"already_inserted", - "already_inserted\"", - "[\"already_inserted\"]", - ]; - - for test_case in malformed_test_cases { - buf.input = test_case; - assert_eq!( - label.parse_next(&mut buf), - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "label" - )])) - ); - } +fn next_line<'d>(input: &mut &'d [u8]) -> Option<&'d [u8]> { + if input.is_empty() { + return None; } - #[test] - fn test_coverage_datapoint() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; + let line_bytes = if let Some(pos) = memchr(b'\n', input) { + let line_bytes = &input[..pos]; + *input = &input[pos + 1..]; + line_bytes + } else { + mem::take(input) + }; + Some(line_bytes) +} - let valid_test_cases = [ - ( - "[1, \"2/2\", \"b\", [\"test_case\"]]", - Ok(( - 1, - CoverageDatapoint { - session_id: 1, - _coverage: PyreportCoverage::BranchesTaken { - covered: 2, - total: 2, - }, - _coverage_type: Some(CoverageType::Branch), - labels: vec!["test_case".to_string()], - }, - )), - ), - ( - "[1, 2, null, []]", - Ok(( - 1, - CoverageDatapoint { - session_id: 1, - _coverage: PyreportCoverage::HitCount(2), - _coverage_type: Some(CoverageType::Line), - labels: vec![], - }, - )), - ), - ( - "[3, true, null, [1, 2, 3]]", - Ok(( - 3, - CoverageDatapoint { - session_id: 3, - _coverage: PyreportCoverage::Partial(), - _coverage_type: Some(CoverageType::Line), - labels: vec!["1".to_string(), "2".to_string(), "3".to_string()], - }, - )), - ), - ]; +#[derive(Debug, PartialEq, Eq, Default, Deserialize)] +pub struct FileHeader { + #[serde(default)] + pub labels_index: HashMap, +} - assert!(buf.state.labels_index.is_empty()); - for test_case in valid_test_cases { - buf.input = test_case.0; - assert_eq!(coverage_datapoint.parse_next(&mut buf), test_case.1); - } - assert_eq!(buf.state.labels_index.len(), 4); - assert!(buf.state.labels_index.contains_key("test_case")); - assert!(buf.state.labels_index.contains_key("1")); - assert!(buf.state.labels_index.contains_key("2")); - assert!(buf.state.labels_index.contains_key("3")); +#[derive(Debug, PartialEq, Eq, Default, Deserialize)] +pub struct ChunkHeader { + #[serde(default)] + pub present_sessions: Vec, +} - let invalid_test_cases = [ - ( - "[]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ( - "[1, 2]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ( - "[1, 2, \"b\"]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ( - "[1, 2, \"b\", []", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ( - "", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ( - "[1, 2, null, []", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ( - "1, 2, null, []]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ( - "[1, 2, null, [test_case, test_case_2]", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "coverage_datapoint", - )])), - ), - ]; - for test_case in invalid_test_cases { - buf.input = test_case.0; - assert_eq!(coverage_datapoint.parse_next(&mut buf), test_case.1); +#[derive(Debug, Clone, Deserialize)] +struct IgnoredAnyEq(IgnoredAny); +impl PartialEq for IgnoredAnyEq { + fn eq(&self, _other: &Self) -> bool { + true + } +} +impl Eq for IgnoredAnyEq {} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub struct LineRecord( + /// coverage + PyreportCoverage, + /// coverage type + Option, + /// sessions + Vec, + /// messages + #[serde(default)] + Option, + /// complexity + #[serde(default)] + Option, + /// datapoints + #[serde(default)] + Option>, +); + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub struct LineSession( + /// session id + usize, + /// coverage + PyreportCoverage, + /// branches + #[serde(default)] + Option>, + /// partials + #[serde(default)] + Option>, + /// TODO: complexity + #[serde(default)] + Option, +); + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub struct CoverageDatapoint( + /// session id + u32, + /// coverage + PyreportCoverage, + /// coverage type + #[serde(default)] + Option, + /// labels + #[serde(default)] + Option>, +); + +impl From for types::CoverageDatapoint { + fn from(datapoint: CoverageDatapoint) -> Self { + Self { + session_id: datapoint.0, + _coverage: datapoint.1, + _coverage_type: datapoint.2, + labels: datapoint.3.unwrap_or_default(), } } +} - #[test] - fn test_report_line() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - buf.state.labels_index.insert("test_case".to_string(), 100); - - let test_cases = [ - ( - "[1, null, [[0, 1]]]", - Ok(ReportLine { - line_no: 0, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Line, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }], - _messages: None, - _complexity: None, - datapoints: None, - }), - ), - ( - "[1, null, [[0, 1], [1, 1]]]", - Ok(ReportLine { - line_no: 0, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Line, - sessions: vec![ - LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }, - LineSession { - session_id: 1, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }, - ], - _messages: None, - _complexity: None, - datapoints: None, - }), - ), - ( - "[1, null, [[0, 1]], null, 3]", - Ok(ReportLine { - line_no: 0, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Line, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }], - _messages: Some(Some(JsonVal::Null)), - _complexity: Some(Some(Complexity::Total(3))), - datapoints: None, - }), - ), - ( - "[1, null, [[0, 1]], null, null, []]", - Ok(ReportLine { - line_no: 0, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Line, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }], - _messages: Some(Some(JsonVal::Null)), - _complexity: Some(None), - datapoints: Some(Some(HashMap::new())), - }), - ), - ( - "[1, null, [[0, 1]], null, null, [[0, 1, null, [\"test_case\"]]]]", - Ok(ReportLine { - line_no: 0, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Line, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }], - _messages: Some(Some(JsonVal::Null)), - _complexity: Some(None), - datapoints: Some(Some(HashMap::from([( - 0, - CoverageDatapoint { - session_id: 0, - _coverage: PyreportCoverage::HitCount(1), - _coverage_type: Some(CoverageType::Line), - labels: vec!["test_case".to_string()], - }, - )]))), - }), - ), - ( - "[\"2/2\", \"b\", [[0, \"2/2\"]], null, null, [[0, \"2/2\", \"b\", [\"test_case\"]]]]", - Ok(ReportLine { - line_no: 0, - coverage: PyreportCoverage::BranchesTaken{covered: 2, total: 2}, - coverage_type: CoverageType::Branch, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::BranchesTaken{covered: 2, total: 2}, - branches: None, - partials: None, - complexity: None, - }], - _messages: Some(Some(JsonVal::Null)), - _complexity: Some(None), - datapoints: Some(Some(HashMap::from([( - 0, - CoverageDatapoint { - session_id: 0, - _coverage: PyreportCoverage::BranchesTaken{covered: 2, total: 2}, - _coverage_type: Some(CoverageType::Branch), - labels: vec!["test_case".to_string()], - }, - )]))), - }), - ), - ( - "[1, \"m\", [[0, 1]], null, null, [[0, 1, \"m\", [\"test_case\"]]]]", - Ok(ReportLine { - line_no: 0, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Method, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }], - _messages: Some(Some(JsonVal::Null)), - _complexity: Some(None), - datapoints: Some(Some(HashMap::from([( - 0, - CoverageDatapoint { - session_id: 0, - _coverage: PyreportCoverage::HitCount(1), - _coverage_type: Some(CoverageType::Method), - labels: vec!["test_case".to_string()], - }, - )]))), - }), - ), - // Malformed inputs - ( - // Unquoted coverage type - "[1, \"m\", [[0, 1]], null, null, [[0, 1, m, [\"test_case\"]]]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label("report_line")])), - ), - ( - // Quoted coverage field - "[\"1\", \"m\", [[0, 1]], null, null, [[0, 1, \"m\", [\"test_case\"]]]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label("coverage"), StrContext::Label("report_line")])), - ), - ( - // Missing closing brace - "[1, \"m\", [[0, 1]], null, null, [[0, 1, \"m\", [\"test_case\"]]]", - Err(backtrack_error_with_contexts(vec![StrContext::Label("report_line")])), - ), - ( - // Trailing comma - "[1, \"m\", [[0, 1]], null, null,]", - Err(backtrack_error_with_contexts(vec![StrContext::Label("report_line")])), - ), - ( - // Missing `sessions` - "[1, \"m\"]", - Err(backtrack_error_with_contexts(vec![StrContext::Label("report_line")])), - ), - ]; +impl<'s> TryFrom<&'s str> for CoverageType { + type Error = &'s str; - for test_case in test_cases { - println!("testing {:?}", test_case.0); - buf.input = test_case.0; - assert_eq!(report_line.parse_next(&mut buf), test_case.1); + fn try_from(value: &'s str) -> Result { + match value { + "line" => Ok(Self::Line), + "b" | "branch" => Ok(Self::Branch), + "m" | "method" => Ok(Self::Method), + s => Err(s), } } +} - /* TODO - #[test] - fn test_report_line_or_empty() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - - buf.state.labels_index.insert("test_case".to_string(), 100); - stub_report_builder(&mut buf.state.db.report_builder); - - let valid_test_cases = [ - // Test that empty lines will still advance the `current_line` state - ("\n", Ok(None)), - ("\n", Ok(None)), - ("\n", Ok(None)), - ("[1, null, [[0, 1]]]", - Ok(Some(ReportLine { - line_no: 4, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Line, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }], - _messages: None, - _complexity: None, - datapoints: None, - })), - ), - ("[1, null, [[0, 1]], null, 3]", - Ok(Some(ReportLine { - line_no: 5, - coverage: PyreportCoverage::HitCount(1), - coverage_type: CoverageType::Line, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::HitCount(1), - branches: None, - partials: None, - complexity: None, - }], - _messages: Some(Some(JsonVal::Null)), - _complexity: Some(Some(Complexity::Total(3))), - datapoints: None, - })), - ), - ("[\"2/2\", \"b\", [[0, \"2/2\"]], null, null, [[0, \"2/2\", \"b\", [\"test_case\"]]]]", - Ok(Some(ReportLine { - line_no: 6, - coverage: PyreportCoverage::BranchesTaken{covered: 2, total: 2}, - coverage_type: CoverageType::Branch, - sessions: vec![LineSession { - session_id: 0, - coverage: PyreportCoverage::BranchesTaken{covered: 2, total: 2}, - branches: None, - partials: None, - complexity: None, - }], - _messages: Some(Some(JsonVal::Null)), - _complexity: Some(None), - datapoints: Some(Some(HashMap::from([( - 0, - CoverageDatapoint { - session_id: 0, - _coverage: PyreportCoverage::BranchesTaken{covered: 2, total: 2}, - _coverage_type: Some(CoverageType::Branch), - labels: vec!["test_case".to_string()], - }, - )]))), - })), - ), - ("\n", Ok(None)), - // The last line in the entire chunks file ends in EOF, not \n - ("", Ok(None)), - // `CHUNKS_FILE_END_OF_CHUNK` begins with a `\n` so we know the current line is empty - (CHUNKS_FILE_END_OF_CHUNK, Ok(None)), - ]; - let expected_line_count = valid_test_cases.len(); - - assert_eq!(buf.state.chunk.current_line, 0); - for test_case in valid_test_cases { - buf.input = test_case.0; - assert_eq!(report_line_or_empty.parse_next(&mut buf), test_case.1); +impl<'de> Deserialize<'de> for PyreportCoverage { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct CoverageVisitor; + impl de::Visitor<'_> for CoverageVisitor { + type Value = PyreportCoverage; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a coverage value") + } + + fn visit_bool(self, v: bool) -> Result + where + E: de::Error, + { + if v { + Ok(PyreportCoverage::Partial()) + } else { + Err(de::Error::invalid_value(de::Unexpected::Bool(v), &self)) + } + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + Ok(PyreportCoverage::HitCount(value as u32)) + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let invalid = || de::Error::invalid_value(de::Unexpected::Str(v), &self); + let (covered, total) = v.split_once('/').ok_or_else(invalid)?; + + let covered: u32 = covered.parse().map_err(|_| invalid())?; + let total: u32 = total.parse().map_err(|_| invalid())?; + Ok(PyreportCoverage::BranchesTaken { covered, total }) + } } - assert_eq!(buf.state.chunk.current_line as usize, expected_line_count); - buf.state.chunk.current_line = 0; - let invalid_test_cases = [ - ( - // Quoted coverage field - "[\"1\", \"m\", [[0, 1]], null, null, [[0, 1, \"m\", [\"test_case\"]]]]", - Err(backtrack_error_with_contexts(vec![])), - ), - ( - // Missing closing brace - "[1, \"m\", [[0, 1]], null, null, [[0, 1, \"m\", [\"test_case\"]]]", - Err(backtrack_error_with_contexts(vec![])), - ), - ( - // Trailing comma - "[1, \"m\", [[0, 1]], null, null,]", - Err(backtrack_error_with_contexts(vec![])), - ), - ]; - let expected_line_count = invalid_test_cases.len(); - for test_case in invalid_test_cases { - buf.input = test_case.0; - assert_eq!(report_line_or_empty.parse_next(&mut buf), test_case.1); - } - // We still increment the line number even for malformed lines so that we don't - // throw off subsequent lines that are well-formed. - assert_eq!(buf.state.chunk.current_line as usize, expected_line_count); + deserializer.deserialize_any(CoverageVisitor) } - */ - - #[test] - fn test_chunk_header() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - - let test_cases = [ - ("{}\n", Ok(JsonMap::new())), - ( - "{\"present_sessions\": []}\n", - Ok(JsonMap::from_iter([( - "present_sessions".to_string(), - JsonVal::Array(vec![]), - )])), - ), - // Missing newline - ( - "{}", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "chunk_header", - )])), - ), - // Missing dict and newline - ( - "", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "chunk_header", - )])), - ), - // Missing dict - ( - "\n", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "chunk_header", - )])), - ), - ( - "present_sessions: []", - Err(backtrack_error_with_contexts(vec![StrContext::Label( - "chunk_header", - )])), - ), - ]; +} - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(chunk_header.parse_next(&mut buf), test_case.1); +impl<'de> Deserialize<'de> for MissingBranch { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct MissingBranchVisitor; + impl de::Visitor<'_> for MissingBranchVisitor { + type Value = MissingBranch; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a missing branch value") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let invalid = || de::Error::invalid_value(de::Unexpected::Str(v), &self); + + if let Some((block, branch)) = v.split_once(":") { + let block: u32 = block.parse().map_err(|_| invalid())?; + let branch: u32 = branch.parse().map_err(|_| invalid())?; + + return Ok(MissingBranch::BlockAndBranch(block, branch)); + } + + if let Some(condition) = v.strip_suffix(":jump") { + let condition: u32 = condition.parse().map_err(|_| invalid())?; + + // TODO(swatinem): can we skip saving the `jump` here? + return Ok(MissingBranch::Condition(condition, Some("jump".into()))); + } + + let line: u32 = v.parse().map_err(|_| invalid())?; + Ok(MissingBranch::Line(line)) + } } - } - - #[test] - fn test_chunk() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - - // (input, (result, expected_line_count)) - let test_cases = [ - // We consume `{}\n` to parse the header, leaving the stream empty. - // `report_line_or_empty` recognizes this as an empty line terminated by EOF, so it - // succeeds. - ("{}\n", (Ok(()), 1)), - // Similar to the above. `{}\n` is the header, then one empty line terminated by `\n`, - // and then a second empty line terminated by EOF. - ("{}\n\n", (Ok(()), 2)), - // No trailing newlines. There is a single line of data following the header, and then - // that's it. - ("{}\n[1, null, [[0, 1]]]", (Ok(()), 1)), - ( - // `{}\n` is the header, then we have two lines of data delimited by `\n` - "{}\n[1, null, [[0, 1]]]\n[0, null, [[0, 1]]]", - (Ok(()), 2), - ), - ( - // Same as above, but the trailing newline represents an extra empty line - "{}\n[1, null, [[0, 1]]]\n[0, null, [[0, 1]]]\n", - (Ok(()), 3), - ), - ( - // Same as above, but the trailing newline represents an extra empty line - "{}\n[1, null, [[0, 1]]]\n\n\n[0, null, [[0, 1]]]\n\n", - (Ok(()), 6), - ), - ( - // One line of data followed by the "end of chunk" delimiter. We don't consider the - // delimiter to be a line, but attempting to parse it as one still increments the - // line count. - "{}\n[1, null, [[0, 1]]]\n<<<<< end_of_chunk >>>>>\n\n", - (Ok(()), 2), - ), - // Chunks can have no header or report lines and only the text "null" - ("null\n", (Ok(()), 0)), - // Malformed - // Missing newline after header - ( - "{}", - ( - Err(cut_error_with_contexts(vec![ - StrContext::Label("chunk_header"), - StrContext::Label("chunk"), - ])), - 0, - ), - ), - // Missing header - ( - "\n\n", - ( - Err(cut_error_with_contexts(vec![ - StrContext::Label("chunk_header"), - StrContext::Label("chunk"), - ])), - 0, - ), - ), - ( - // Malformed report line. Attempting the parse still increments the line count. - "{}\n[1, null, [[0, 1]]\n\n", - ( - Err(cut_error_with_contexts(vec![ - StrContext::Label("report_line_or_empty"), - StrContext::Label("chunk"), - ])), - 1, - ), - ), - ( - // Malformed header - "{[]}\n\n", - ( - Err(cut_error_with_contexts(vec![ - StrContext::Label("chunk_header"), - StrContext::Label("chunk"), - ])), - 0, - ), - ), - ]; - for test_case in test_cases { - buf.state.chunk.index = 0; - buf.input = test_case.0; - let expected = test_case.1; - assert_eq!(chunk.parse_next(&mut buf), expected.0); - assert_eq!(buf.state.chunk.current_line, expected.1); - } + deserializer.deserialize_any(MissingBranchVisitor) } +} + +#[cfg(test)] +mod tests { + use super::*; #[test] - fn test_chunks_file_header() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; + fn test_parsing_chunks() { + let simple_line_record = LineRecord( + PyreportCoverage::HitCount(1), + None, + vec![LineSession( + 0, + PyreportCoverage::HitCount(1), + None, + None, + None, + )], + None, + None, + None, + ); - assert!(buf.state.labels_index.is_empty()); - let test_cases = [ - ( - "{\"labels_index\": {\"1\": \"test_name\"}}\n<<<<< end_of_header >>>>>\n", - Ok(()), - ), - ("{\"labels_index\": {\"test_name\": \"test_name\"}}\n<<<<< end_of_header >>>>>\n", Ok(())), + #[allow(clippy::type_complexity)] + let cases: &[( + &[u8], // input + &[&[Option]], // chunks: line records + )] = &[ ( - // This unrecognized key is just ignored - "{\"not_labels_index\": {\"test_name_2\": \"test_name_2\"}}\n<<<<< end_of_header >>>>>\n", - Ok(()), + // Header and one chunk with an empty line + b"{}\n<<<<< end_of_header >>>>>\n{}\n", + &[&[]], ), - ("{", Err(backtrack_error_with_contexts(vec![StrContext::Label("chunks_file_header")]))), - ("", Err(backtrack_error_with_contexts(vec![StrContext::Label("chunks_file_header")]))), ( - // Missing terminator - "{\"labels_index\": {\"1\": \"test_name\"}}", - Err(backtrack_error_with_contexts(vec![StrContext::Label("chunks_file_header")])), + // No header, one chunk with a populated line and an empty line + b"{}\n[1, null, [[0, 1]]]\n", + &[&[Some(simple_line_record.clone())]], ), - ( - // Missing newline before terminator - "{\"labels_index\": {\"1\": \"test_name\"}}<<<<< end_of_header >>>>>\n", - Err(backtrack_error_with_contexts(vec![StrContext::Label("chunks_file_header")])), - ), - ]; - - for test_case in test_cases { - buf.input = test_case.0; - assert_eq!(chunks_file_header.parse_next(&mut buf), test_case.1); - } - assert_eq!(buf.state.labels_index.len(), 2); - assert!(buf.state.labels_index.contains_key("1")); - assert!(buf.state.labels_index.contains_key("test_name")); - - let report = buf.state.db.report_builder.build().unwrap(); - assert_eq!( - report.contexts, - &[Context::new("test_name"), Context::new("test_name")] - ); - } - - #[test] - fn test_parse_chunks_file() { - let test_ctx = setup(); - let mut buf = TestStream { - input: "", - state: test_ctx.parse_ctx, - }; - buf.state.labels_index.insert("test_case".to_string(), 100); - - // (input, (result, expected_chunk_index, expected_line_count)) - let test_cases = [ - // Header and one chunk with an empty line - ("{}\n<<<<< end_of_header >>>>>\n{}\n", (Ok(()), 1, 1)), - // No header, one chunk with a populated line and an empty line - ("{}\n[1, null, [[0, 1]]]\n", (Ok(()), 1, 2)), ( // No header, two chunks, the second having just one empty line - "{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", - (Ok(()), 2, 1), + b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", + &[&[Some(simple_line_record.clone())], &[]], ), ( // Header, two chunks, the second having multiple data lines and an empty line - "{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", - (Ok(()), 2, 3), - ), - // Malformed - ( - // Header but 0 chunks - "{}\n<<<<< end_of_header >>>>>\n\n", - (Err(cut_error_with_contexts(vec![StrContext::Label("chunk_header"), StrContext::Label("chunk"), StrContext::Label("parse_chunks_file")])), 0, 0), - ), - // No header (fine) but 0 chunks - ("", (Err(cut_error_with_contexts(vec![StrContext::Label("chunk_header"), StrContext::Label("chunk"), StrContext::Label("parse_chunks_file")])), 0, 0)), - ( - // Malformed report line. Attempting the line parse still increments the line count. - "{}\n[1, null, [[0, 1]]\n<<<<< end_of_chunk >>>>>\n{}\n\n", - (Err(cut_error_with_contexts(vec![StrContext::Label("report_line_or_empty"), StrContext::Label("chunk"), StrContext::Label("parse_chunks_file")])), 0, 1) + b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", + &[ + &[Some(simple_line_record.clone())], + &[ + Some(simple_line_record.clone()), + Some(simple_line_record.clone()), + ], + ], ), ]; - for test_case in test_cases { - buf.state.chunk.index = 0; - buf.state.chunk.current_line = 0; - buf.input = test_case.0; - let expected_result = test_case.1; - assert_eq!(parse_chunks_file.parse_next(&mut buf), expected_result.0); - assert_eq!(buf.state.chunk.index, expected_result.1); - assert_eq!(buf.state.chunk.current_line, expected_result.2); + for (input, expected_chunks) in cases { + let chunks_file = ChunksFile::new(input).unwrap(); + let mut chunks = chunks_file.chunks(); + + for expected_line_records in *expected_chunks { + let mut chunk = chunks.next_chunk().unwrap().unwrap(); + + let mut lines = vec![]; + while let Some(line) = chunk.next_line().unwrap() { + lines.push(line); + } + + assert_eq!(lines, *expected_line_records); + } + assert!(chunks.next_chunk().unwrap().is_none()); } } } diff --git a/core/src/parsers/pyreport/chunks_serde.rs b/core/src/parsers/pyreport/chunks_serde.rs deleted file mode 100644 index af887ae..0000000 --- a/core/src/parsers/pyreport/chunks_serde.rs +++ /dev/null @@ -1,527 +0,0 @@ -//! A parser for the `chunks` file format. -//! -//! A chunks file contains an optional header and a series of 1 or more -//! "chunks", separated by an `END_OF_CHUNK` terminator. -//! -//! Chunks files sometimes begin with a JSON object followed by an -//! `END_OF_HEADER` terminator string. -//! The JSON object contains: -//! - `"labels_index"`: assigns a numeric ID to each label to save space -//! -//! If the `"labels_index"` key is present, this parser will insert each label -//! into the report as a [`Context`](models::Context) and create a mapping -//! in `buf.state.labels_index` from numeric ID in the header to the -//! new [`Context`](models::Context)'s ID in the output report. If the -//! `"labels_index"` key is _not_ present, we will populate -//! `buf.state.labels_index` gradually as we encounter new labels during -//! parsing. -//! -//! A chunk contains all of the line-by-line measurements for -//! a file. The Nth chunk corresponds to the file whose entry in -//! `buf.state.report_json_files` has N in its `chunks_index` field. -//! -//! Each new chunk will reset `buf.state.chunk.current_line` to 0 when it starts -//! and increment `buf.state.chunk.index` when it ends so that the next chunk -//! can associate its data with the correct file. -//! -//! A line may be empty, or it may contain a [`LineRecord`]. -//! A [`LineRecord`] itself does not correspond to anything in the output, -//! but it's an umbrella that includes all of the data -//! tied to a line/[`CoverageSample`](models::CoverageSample). -//! -//! This parser performs all the writes it can to the output -//! stream and only returns a [`ReportLine`] for tests. The -//! `report_line_or_empty` parser which wraps this and supports empty lines -//! returns `Ok(())`. - -use std::{collections::HashMap, fmt, mem, sync::OnceLock}; - -use memchr::{memchr, memmem}; -use serde::{de, de::IgnoredAny, Deserialize}; - -use super::{chunks::ParseCtx, report_json::ParsedReportJson, utils}; -use crate::{ - error::CodecovError, - report::{ - pyreport::{ - types::{self, CoverageType, MissingBranch, Partial, PyreportCoverage, ReportLine}, - CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR, - }, - Report, ReportBuilder, - }, -}; - -pub fn parse_chunks_file( - input: &[u8], - report_json: ParsedReportJson, - mut builder: B, -) -> Result<(), CodecovError> -where - B: ReportBuilder, - R: Report, -{ - let chunks_file = ChunksFile::new(input)?; - - let mut labels_index = HashMap::with_capacity(chunks_file.labels_index().len()); - for (index, name) in chunks_file.labels_index() { - let context = builder.insert_context(name)?; - labels_index.insert(index.clone(), context.id); - } - - let mut ctx = ParseCtx::new(builder, report_json.files, report_json.sessions); - ctx.labels_index = labels_index; - - let mut report_lines = vec![]; - - let mut chunks = chunks_file.chunks(); - let mut chunk_no = 0; - while let Some(mut chunk) = chunks.next_chunk()? { - let mut line_no = 0; - report_lines.clear(); - while let Some(line) = chunk.next_line()? { - line_no += 1; - if let Some(line) = line { - let sessions = line - .2 - .into_iter() - .map(|session| types::LineSession { - session_id: session.0, - coverage: session.1, - branches: session.2.into(), - partials: session.3.into(), - complexity: None, // TODO - }) - .collect(); - - let datapoints: Option> = line.5.map(|dps| { - dps.into_iter() - .map(|dp| (dp.0, types::CoverageDatapoint::from(dp))) - .collect() - }); - - if let Some(datapoints) = &datapoints { - for datapoint in datapoints.values() { - for label in &datapoint.labels { - if !ctx.labels_index.contains_key(label) { - let context = ctx.db.report_builder.insert_context(label)?; - ctx.labels_index.insert(label.into(), context.id); - } - } - } - } - - let mut report_line = ReportLine { - line_no, - coverage: line.0, - coverage_type: line.1.unwrap_or_default(), - sessions, - _messages: None, - _complexity: None, - datapoints: Some(datapoints), - }; - report_line.normalize(); - report_lines.push(report_line); - } - } - - ctx.chunk.index = chunk_no; - utils::save_report_lines(&report_lines, &mut ctx)?; - chunk_no += 1; - } - - Ok(()) -} - -#[derive(Debug, thiserror::Error)] -pub enum ChunksFileParseError { - #[error("unexpected EOF")] - UnexpectedEof, - #[error("unexpected input")] - UnexpectedInput, - #[error("invalid file header")] - InvalidFileHeader(#[source] serde_json::Error), - #[error("invalid chunk header")] - InvalidChunkHeader(#[source] serde_json::Error), - #[error("invalid line record")] - InvalidLineRecord(#[source] serde_json::Error), -} - -impl PartialEq for ChunksFileParseError { - fn eq(&self, other: &Self) -> bool { - core::mem::discriminant(self) == core::mem::discriminant(other) - } -} -impl Eq for ChunksFileParseError {} - -#[derive(Debug)] -pub struct ChunksFile<'d> { - file_header: FileHeader, - input: &'d [u8], -} - -impl<'d> ChunksFile<'d> { - pub fn new(mut input: &'d [u8]) -> Result { - static HEADER_FINDER: OnceLock = OnceLock::new(); - let header_finder = - HEADER_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_HEADER_TERMINATOR)); - - let file_header = if let Some(pos) = header_finder.find(input) { - let header_bytes = &input[..pos]; - input = &input[pos + header_finder.needle().len()..]; - let file_header: FileHeader = serde_json::from_slice(header_bytes) - .map_err(ChunksFileParseError::InvalidFileHeader)?; - file_header - } else { - FileHeader::default() - }; - - Ok(Self { file_header, input }) - } - - pub fn labels_index(&self) -> &HashMap { - &self.file_header.labels_index - } - - pub fn chunks(&self) -> Chunks { - Chunks { input: self.input } - } -} - -pub struct Chunks<'d> { - input: &'d [u8], -} - -impl<'d> Chunks<'d> { - pub fn next_chunk(&mut self) -> Result>, ChunksFileParseError> { - if self.input.is_empty() { - return Ok(None); - } - - static CHUNK_FINDER: OnceLock = OnceLock::new(); - let chunk_finder = - CHUNK_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_END_OF_CHUNK)); - - let mut chunk_bytes = if let Some(pos) = chunk_finder.find(self.input) { - let chunk_bytes = &self.input[..pos]; - self.input = &self.input[pos + chunk_finder.needle().len()..]; - chunk_bytes - } else { - mem::take(&mut self.input) - }; - - if chunk_bytes == b"null" { - return Ok(Some(Chunk { - chunk_header: ChunkHeader::default(), - input: &[], - })); - } - - let header_bytes = - next_line(&mut chunk_bytes).ok_or(ChunksFileParseError::UnexpectedInput)?; - let chunk_header: ChunkHeader = serde_json::from_slice(header_bytes) - .map_err(ChunksFileParseError::InvalidFileHeader)?; - - Ok(Some(Chunk { - chunk_header, - input: chunk_bytes, - })) - } -} - -pub struct Chunk<'d> { - chunk_header: ChunkHeader, - input: &'d [u8], -} - -impl Chunk<'_> { - pub fn present_sessions(&self) -> &[u32] { - &self.chunk_header.present_sessions - } - - pub fn next_line(&mut self) -> Result>, ChunksFileParseError> { - let Some(line) = next_line(&mut self.input) else { - return Ok(None); - }; - - if line.is_empty() { - return Ok(Some(None)); - } - - let line_record: LineRecord = - serde_json::from_slice(line).map_err(ChunksFileParseError::InvalidLineRecord)?; - Ok(Some(Some(line_record))) - } -} - -fn next_line<'d>(input: &mut &'d [u8]) -> Option<&'d [u8]> { - if input.is_empty() { - return None; - } - - let line_bytes = if let Some(pos) = memchr(b'\n', input) { - let line_bytes = &input[..pos]; - *input = &input[pos + 1..]; - line_bytes - } else { - mem::take(input) - }; - Some(line_bytes) -} - -#[derive(Debug, PartialEq, Eq, Default, Deserialize)] -pub struct FileHeader { - #[serde(default)] - pub labels_index: HashMap, -} - -#[derive(Debug, PartialEq, Eq, Default, Deserialize)] -pub struct ChunkHeader { - #[serde(default)] - pub present_sessions: Vec, -} - -#[derive(Debug, Clone, Deserialize)] -struct IgnoredAnyEq(IgnoredAny); -impl PartialEq for IgnoredAnyEq { - fn eq(&self, _other: &Self) -> bool { - true - } -} -impl Eq for IgnoredAnyEq {} - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -pub struct LineRecord( - /// coverage - PyreportCoverage, - /// coverage type - Option, - /// sessions - Vec, - /// messages - #[serde(default)] - Option, - /// complexity - #[serde(default)] - Option, - /// datapoints - #[serde(default)] - Option>, -); - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -pub struct LineSession( - /// session id - usize, - /// coverage - PyreportCoverage, - /// branches - #[serde(default)] - Option>, - /// partials - #[serde(default)] - Option>, - /// TODO: complexity - #[serde(default)] - Option, -); - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -pub struct CoverageDatapoint( - /// session id - u32, - /// coverage - PyreportCoverage, - /// coverage type - #[serde(default)] - Option, - /// labels - #[serde(default)] - Option>, -); - -impl From for types::CoverageDatapoint { - fn from(datapoint: CoverageDatapoint) -> Self { - Self { - session_id: datapoint.0, - _coverage: datapoint.1, - _coverage_type: datapoint.2, - labels: datapoint.3.unwrap_or_default(), - } - } -} - -impl<'s> TryFrom<&'s str> for CoverageType { - type Error = &'s str; - - fn try_from(value: &'s str) -> Result { - match value { - "line" => Ok(Self::Line), - "b" | "branch" => Ok(Self::Branch), - "m" | "method" => Ok(Self::Method), - s => Err(s), - } - } -} - -impl<'de> Deserialize<'de> for PyreportCoverage { - fn deserialize(deserializer: D) -> Result - where - D: de::Deserializer<'de>, - { - struct CoverageVisitor; - impl de::Visitor<'_> for CoverageVisitor { - type Value = PyreportCoverage; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a coverage value") - } - - fn visit_bool(self, v: bool) -> Result - where - E: de::Error, - { - if v { - Ok(PyreportCoverage::Partial()) - } else { - Err(de::Error::invalid_value(de::Unexpected::Bool(v), &self)) - } - } - - fn visit_u64(self, value: u64) -> Result - where - E: de::Error, - { - Ok(PyreportCoverage::HitCount(value as u32)) - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - let invalid = || de::Error::invalid_value(de::Unexpected::Str(v), &self); - let (covered, total) = v.split_once('/').ok_or_else(invalid)?; - - let covered: u32 = covered.parse().map_err(|_| invalid())?; - let total: u32 = total.parse().map_err(|_| invalid())?; - Ok(PyreportCoverage::BranchesTaken { covered, total }) - } - } - - deserializer.deserialize_any(CoverageVisitor) - } -} - -impl<'de> Deserialize<'de> for MissingBranch { - fn deserialize(deserializer: D) -> Result - where - D: de::Deserializer<'de>, - { - struct MissingBranchVisitor; - impl de::Visitor<'_> for MissingBranchVisitor { - type Value = MissingBranch; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a missing branch value") - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - let invalid = || de::Error::invalid_value(de::Unexpected::Str(v), &self); - - if let Some((block, branch)) = v.split_once(":") { - let block: u32 = block.parse().map_err(|_| invalid())?; - let branch: u32 = branch.parse().map_err(|_| invalid())?; - - return Ok(MissingBranch::BlockAndBranch(block, branch)); - } - - if let Some(condition) = v.strip_suffix(":jump") { - let condition: u32 = condition.parse().map_err(|_| invalid())?; - - // TODO(swatinem): can we skip saving the `jump` here? - return Ok(MissingBranch::Condition(condition, Some("jump".into()))); - } - - let line: u32 = v.parse().map_err(|_| invalid())?; - Ok(MissingBranch::Line(line)) - } - } - - deserializer.deserialize_any(MissingBranchVisitor) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parsing_chunks() { - let simple_line_record = LineRecord( - PyreportCoverage::HitCount(1), - None, - vec![LineSession( - 0, - PyreportCoverage::HitCount(1), - None, - None, - None, - )], - None, - None, - None, - ); - - #[allow(clippy::type_complexity)] - let cases: &[( - &[u8], // input - &[&[Option]], // chunks: line records - )] = &[ - ( - // Header and one chunk with an empty line - b"{}\n<<<<< end_of_header >>>>>\n{}\n", - &[&[]], - ), - ( - // No header, one chunk with a populated line and an empty line - b"{}\n[1, null, [[0, 1]]]\n", - &[&[Some(simple_line_record.clone())]], - ), - ( - // No header, two chunks, the second having just one empty line - b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n", - &[&[Some(simple_line_record.clone())], &[]], - ), - ( - // Header, two chunks, the second having multiple data lines and an empty line - b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", - &[ - &[Some(simple_line_record.clone())], - &[ - Some(simple_line_record.clone()), - Some(simple_line_record.clone()), - ], - ], - ), - ]; - - for (input, expected_chunks) in cases { - let chunks_file = ChunksFile::new(input).unwrap(); - let mut chunks = chunks_file.chunks(); - - for expected_line_records in *expected_chunks { - let mut chunk = chunks.next_chunk().unwrap().unwrap(); - - let mut lines = vec![]; - while let Some(line) = chunk.next_line().unwrap() { - lines.push(line); - } - - assert_eq!(lines, *expected_line_records); - } - assert!(chunks.next_chunk().unwrap().is_none()); - } - } -} diff --git a/core/src/parsers/pyreport/mod.rs b/core/src/parsers/pyreport/mod.rs index 6d9ff6a..537cf29 100644 --- a/core/src/parsers/pyreport/mod.rs +++ b/core/src/parsers/pyreport/mod.rs @@ -1,15 +1,10 @@ use std::fs::File; use memmap2::Mmap; -use winnow::Parser; -use crate::{ - error::{CodecovError, Result}, - report::{SqliteReport, SqliteReportBuilder, SqliteReportBuilderTx}, -}; +use crate::{error::Result, report::SqliteReportBuilder}; pub mod chunks; -pub mod chunks_serde; pub mod report_json; mod utils; @@ -40,34 +35,16 @@ pub fn parse_pyreport( chunks_file: &File, report_builder: &mut SqliteReportBuilder, ) -> Result<()> { - // Encapsulate all of this in a block so that `report_builder_tx` gets torn down - // at the end. Otherwise, it'll hold onto a reference to `report_builder` - // and prevent us from consuming `report_builder` to actually build a - // `SqliteReport`. - { - let mut report_builder_tx = report_builder.transaction()?; + let mut report_builder_tx = report_builder.transaction()?; - // Memory-map the input file so we don't have to read the whole thing into RAM - let mmap_handle = unsafe { Mmap::map(report_json_file)? }; - let report_json::ParsedReportJson { files, sessions } = - report_json::parse_report_json(&mmap_handle, &mut report_builder_tx)?; + // Memory-map the input file so we don't have to read the whole thing into RAM + let report_json_file = unsafe { Mmap::map(report_json_file)? }; + let report_json = report_json::parse_report_json(&report_json_file, &mut report_builder_tx)?; - // Replace our mmap handle so the first one can be unmapped - let mmap_handle = unsafe { Mmap::map(chunks_file)? }; - let buf = unsafe { std::str::from_utf8_unchecked(&mmap_handle[..]) }; + // Replace our mmap handle so the first one can be unmapped + let chunks_file = unsafe { Mmap::map(chunks_file)? }; - // Move `report_builder` from the report JSON's parse context to this one - let chunks_ctx = chunks::ParseCtx::new(report_builder_tx, files, sessions); - let mut chunks_stream = - chunks::ReportOutputStream::<&str, SqliteReport, SqliteReportBuilderTx> { - input: buf, - state: chunks_ctx, - }; - chunks::parse_chunks_file - .parse_next(&mut chunks_stream) - .map_err(|e| e.into_inner().unwrap_or_default()) - .map_err(CodecovError::ParserError)?; - } + chunks::parse_chunks_file(&chunks_file, report_json, report_builder_tx)?; Ok(()) } diff --git a/core/tests/test_pyreport_shim.rs b/core/tests/test_pyreport_shim.rs index 5412b38..de0aad3 100644 --- a/core/tests/test_pyreport_shim.rs +++ b/core/tests/test_pyreport_shim.rs @@ -5,18 +5,13 @@ use codecov_rs::{ self, chunks, report_json::{self, ParsedReportJson}, }, - report::{ - models, pyreport::ToPyreport, Report, ReportBuilder, SqliteReport, SqliteReportBuilder, - }, + report::{models, pyreport::ToPyreport, Report, ReportBuilder, SqliteReportBuilder}, }; use serde_json::json; use tempfile::TempDir; use test_utils::fixtures::{ open_fixture, read_fixture, FixtureFormat::Pyreport, FixtureSize::Small, }; -use winnow::Parser; - -type ChunksStream<'a> = chunks::ReportOutputStream<&'a str, SqliteReport, SqliteReportBuilder>; struct Ctx { temp_dir: TempDir, @@ -92,7 +87,6 @@ fn test_parse_report_json() { #[test] fn test_parse_chunks_file() { let input = read_fixture(Pyreport, Small, "codecov-rs-chunks-d2a9ba1.txt").unwrap(); - let input = std::str::from_utf8(&input).unwrap(); let test_ctx = setup(); let mut report_builder = SqliteReportBuilder::open(test_ctx.db_file).unwrap(); @@ -118,21 +112,13 @@ fn test_parse_chunks_file() { .unwrap(); report_json_sessions.insert(0, session.id); - // Set up to call the chunks parser - let chunks_parse_ctx = chunks::ParseCtx::new( - report_builder, - report_json_files.clone(), - report_json_sessions.clone(), - ); - - let mut buf = ChunksStream { - input, - state: chunks_parse_ctx, + let report_json = ParsedReportJson { + files: report_json_files.clone(), + sessions: report_json_sessions, }; - chunks::parse_chunks_file - .parse_next(&mut buf) - .expect("Failed to parse"); + let report_builder_tx = report_builder.transaction().unwrap(); + chunks::parse_chunks_file(&input, report_json, report_builder_tx).expect("Failed to parse"); // Helper function for creating our expected values let mut coverage_sample_id_iterator = 0..; @@ -187,7 +173,7 @@ fn test_parse_chunks_file() { } } - let report = buf.state.db.report_builder.build().unwrap(); + let report = report_builder.build().unwrap(); let actual_coverage_samples = report .list_coverage_samples() .expect("Failed to list coverage samples"); From c89f46f793e19ec56289f4248bb61b9f1dcdd465 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 12 Nov 2024 14:32:18 +0100 Subject: [PATCH 7/9] remove all winnow-related code --- Cargo.lock | 10 - core/Cargo.toml | 1 - core/src/parsers/common.rs | 179 ------- core/src/parsers/json.rs | 593 ------------------------ core/src/parsers/mod.rs | 4 - core/src/parsers/pyreport/chunks.rs | 14 +- core/src/parsers/pyreport/utils.rs | 12 +- core/src/report/models.rs | 6 +- core/src/report/pyreport/chunks.rs | 3 +- core/src/report/pyreport/report_json.rs | 3 +- core/src/report/pyreport/types.rs | 3 +- core/src/report/sqlite/models.rs | 6 +- 12 files changed, 22 insertions(+), 812 deletions(-) delete mode 100644 core/src/parsers/common.rs delete mode 100644 core/src/parsers/json.rs diff --git a/Cargo.lock b/Cargo.lock index 38be938..4641b89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -149,7 +149,6 @@ dependencies = [ "tempfile", "test_utils", "thiserror", - "winnow", ] [[package]] @@ -1123,15 +1122,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winnow" -version = "0.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" -dependencies = [ - "memchr", -] - [[package]] name = "zerocopy" version = "0.7.35" diff --git a/core/Cargo.toml b/core/Cargo.toml index a422907..90d1a89 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -23,7 +23,6 @@ seahash = "4.1.0" serde = { version = "1.0.210", features = ["derive"] } serde_json = "1.0.128" thiserror = "1.0.64" -winnow = "0.5.34" [dev-dependencies] criterion = { version = "2.7.2", package = "codspeed-criterion-compat" } diff --git a/core/src/parsers/common.rs b/core/src/parsers/common.rs deleted file mode 100644 index 8a5ba3c..0000000 --- a/core/src/parsers/common.rs +++ /dev/null @@ -1,179 +0,0 @@ -use std::{fmt, fmt::Debug, marker::PhantomData}; - -use crate::report::{Report, ReportBuilder}; - -#[derive(PartialEq)] -pub struct ReportBuilderCtx> { - pub report_builder: B, - _phantom: PhantomData, -} - -impl> ReportBuilderCtx { - pub fn new(report_builder: B) -> ReportBuilderCtx { - ReportBuilderCtx { - report_builder, - _phantom: PhantomData, - } - } -} - -impl> Debug for ReportBuilderCtx { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ReportBuilderCtx") - // .field("report_builder", &self.report_builder) - .finish() - } -} - -pub mod winnow { - use winnow::{ - ascii::float, - combinator::alt, - error::ParserError, - stream::{AsBStr, Compare, ParseSlice, Stream, StreamIsPartial}, - token::take_while, - PResult, Parser, - }; - - pub trait CharStream = Stream + StreamIsPartial; - pub trait StrStream = CharStream + for<'a> Compare<&'a str> + AsBStr - where - ::IterOffsets: Clone, - ::Slice: ParseSlice; - - /// Characters considered whitespace for the `ws` parser. - const WHITESPACE: &[char] = &[' ', '\t', '\n', '\r']; - - /// Parses a series of whitespace characters, returning the series as a - /// slice. - pub fn ws(buf: &mut S) -> PResult<::Slice> { - take_while(0.., WHITESPACE).parse_next(buf) - } - - /// Parses an unsigned decimal number with support for scientific notation. - /// Truncates floats, clamps numbers not in the `u32` range. - pub fn parse_u32(buf: &mut S) -> PResult { - float.map(move |x: f64| x as u32).parse_next(buf) - } - - /// Combinator that will match the passed-in parser or `null`. - /// - If the passed-in parser matches, return `Some(output)` - /// - Of `null` matches, return `None` - /// - Otherwise, backtrack or whatever - pub fn nullable( - parser: ParseNext, - ) -> impl Parser, Error> - where - ParseNext: Parser, - Error: ParserError, - Output: Clone, - { - alt((parser.map(Some), "null".value(None::))) - } - - #[cfg(test)] - mod tests { - use winnow::{ - ascii::{alpha1, dec_uint}, - error::{ContextError, ErrMode}, - }; - - use super::*; - - #[test] - fn test_ws() { - assert_eq!(ws.parse_peek(" \r\t\n"), Ok(("", " \r\t\n"))); - assert_eq!(ws.parse_peek(" asd"), Ok(("asd", " "))); - assert_eq!(ws.parse_peek("asd "), Ok(("asd ", ""))); - } - - #[test] - fn test_parse_u32() { - assert_eq!(parse_u32.parse_peek("30"), Ok(("", 30))); - assert_eq!(parse_u32.parse_peek("30 "), Ok((" ", 30))); - - // Floats are truncated, not rounded - assert_eq!(parse_u32.parse_peek("30.6 "), Ok((" ", 30))); - assert_eq!(parse_u32.parse_peek("30.1 "), Ok((" ", 30))); - - // Scientific notation - assert_eq!(parse_u32.parse_peek("1e+0"), Ok(("", 1))); - assert_eq!(parse_u32.parse_peek("5.2e+5"), Ok(("", 520000))); - assert_eq!(parse_u32.parse_peek("1.2345e+2"), Ok(("", 123))); - assert_eq!(parse_u32.parse_peek("2.7e-5"), Ok(("", 0))); - - // Numbers are clamped to `u32` range - assert_eq!(parse_u32.parse_peek("5000000000"), Ok(("", 4294967295))); - assert_eq!(parse_u32.parse_peek("2.7e+20"), Ok(("", 4294967295))); - assert_eq!(parse_u32.parse_peek("-1"), Ok(("", 0))); - assert_eq!(parse_u32.parse_peek("-100"), Ok(("", 0))); - assert_eq!(parse_u32.parse_peek("-4.2"), Ok(("", 0))); - assert_eq!(parse_u32.parse_peek("-4.2e-1"), Ok(("", 0))); - - // Malformed - assert_eq!( - parse_u32.parse_peek(" 30"), - Err(ErrMode::Backtrack(ContextError::new())) - ); - assert_eq!( - parse_u32.parse_peek("x30"), - Err(ErrMode::Backtrack(ContextError::new())) - ); - } - - #[test] - fn test_nullable() { - // with floats - assert_eq!( - nullable(float::<&str, f64, ContextError>).parse_peek("3.4"), - Ok(("", Some(3.4))) - ); - assert_eq!( - nullable(float::<&str, f64, ContextError>).parse_peek("null"), - Ok(("", None)) - ); - assert_eq!( - nullable(float::<&str, f64, ContextError>).parse_peek("malformed"), - Err(ErrMode::Backtrack(ContextError::new())), - ); - assert_eq!( - nullable(float::<&str, f64, ContextError>).parse_peek("nul"), - Err(ErrMode::Backtrack(ContextError::new())), - ); - - // with decimals - assert_eq!( - nullable(dec_uint::<&str, u64, ContextError>).parse_peek("3.4"), - Ok((".4", Some(3))) - ); - assert_eq!( - nullable(dec_uint::<&str, u64, ContextError>).parse_peek("null"), - Ok(("", None)) - ); - assert_eq!( - nullable(dec_uint::<&str, u64, ContextError>).parse_peek("malformed"), - Err(ErrMode::Backtrack(ContextError::new())), - ); - assert_eq!( - nullable(dec_uint::<&str, u64, ContextError>).parse_peek("nul"), - Err(ErrMode::Backtrack(ContextError::new())), - ); - - // with chars - assert_eq!( - nullable(alpha1::<&str, ContextError>).parse_peek("abcde"), - Ok(("", Some("abcde"))) - ); - // this is an edge case - `alpha1` has no problem matching `"null"` so we should - // let it - assert_eq!( - nullable(alpha1::<&str, ContextError>).parse_peek("null"), - Ok(("", Some("null"))) - ); - assert_eq!( - nullable(alpha1::<&str, ContextError>).parse_peek(".123."), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } -} diff --git a/core/src/parsers/json.rs b/core/src/parsers/json.rs deleted file mode 100644 index f7ebf2b..0000000 --- a/core/src/parsers/json.rs +++ /dev/null @@ -1,593 +0,0 @@ -pub use serde_json::{ - value::{Map as JsonMap, Number as JsonNumber}, - Value as JsonVal, -}; -use winnow::{ - ascii::float, - combinator::{alt, delimited, opt, preceded, repeat, separated, separated_pair}, - error::{ContextError, ErrMode, ErrorKind, ParserError}, - stream::Stream, - token::none_of, - PResult, Parser, -}; - -use super::common::winnow::*; - -/* - * Parsers in this section return raw Rust types and may be useful to other - * parsers. - */ - -/// Parses the string "null", returning "null" as a slice. -pub fn parse_null(buf: &mut S) -> PResult<::Slice> { - "null".parse_next(buf) -} - -/// Parses the strings "true" and "false", returning the corresponding `bool`s. -pub fn parse_bool(buf: &mut S) -> PResult { - alt(("true".value(true), "false".value(false))).parse_next(buf) -} - -/// Parses numeric strings, returning the value as an f64. -/// Handles scientific notation. -pub fn parse_num(buf: &mut S) -> PResult { - float.verify_map(JsonNumber::from_f64).parse_next(buf) -} - -/// Parses a single character (which may be escaped), returning a `char`. -/// -/// ``` -/// # use codecov_rs::parsers::json::parse_char; -/// # use winnow::Parser; -/// assert_eq!(parse_char.parse_peek("a"), Ok(("", 'a'))); -/// assert_eq!(parse_char.parse_peek("\\n"), Ok(("", '\n'))); -/// ``` -/// -/// Consumes two characters if the first is a `\`. -pub fn parse_char(buf: &mut S) -> PResult { - let c = none_of('"').parse_next(buf); - match c { - Ok('\\') => { - let escaped = buf - .next_token() - .ok_or_else(|| ErrMode::from_error_kind(buf, ErrorKind::Token))?; - match escaped { - '"' | '\'' | '\\' => Ok(escaped), - 'n' => Ok('\n'), - 'r' => Ok('\r'), - 't' => Ok('\t'), - _ => panic!("Unrecognized escape: {}", escaped), - } - } - _ => c, - } -} - -/// Parses a series of characters between two `'"'` delimiters, returning a -/// `String`. -/// -/// Characters are parsed with `parse_char` and thus may be escaped. -pub fn parse_str(buf: &mut S) -> PResult { - delimited( - '"', - repeat(0.., parse_char).fold(String::new, |mut s, c| { - s.push(c); - s - }), - '"', - ) - .parse_next(buf) -} - -/* - * Parsers in this section return collections which may contain multiple - * types. They use the JsonVal enum to express that within Rust's type system - * and are thus json-specific. - */ - -/// Parses a series of json objects between `[]`s and separated by a comma, -/// returning a `Vec`. -pub fn parse_array(buf: &mut S) -> PResult> { - delimited(('[', ws), separated(0.., json_value, ','), (ws, ']')).parse_next(buf) -} - -/// Parses a key-value pair separated by a `:`, returning the key and value in a -/// tuple. -/// -/// The key is parsed with `parse_str` and the value is a `JsonVal`. -pub fn parse_kv(buf: &mut S) -> PResult<(String, JsonVal)> { - separated_pair(parse_str, (ws, ':', ws), json_value).parse_next(buf) -} - -/// Parses a series of key-value pairs separated by a ':' and surrounded by -/// `{}`s, returning a `Map`. -pub fn parse_object(buf: &mut S) -> PResult> { - // parse_kv.map(std::iter::once).map(serde_json::value::Map::from_iter). - // let start_map = parse_kv - // .map(std::iter::once) - // .map(serde_json::value::Map::from_iter); - let add_to_map = |mut m: JsonMap, (k, v)| { - m.insert(k, v); - m - }; - delimited( - ('{', ws), - repeat(0.., preceded(opt((ws, ',', ws)), parse_kv)).fold(JsonMap::new, add_to_map), - (ws, '}'), - ) - .parse_next(buf) -} - -/// Parses any json value, returning a `JsonVal`. -/// -/// Whitespace is stripped before/after valid json values. -pub fn json_value(buf: &mut S) -> PResult { - delimited( - ws, - alt(( - parse_null.value(JsonVal::Null), - parse_bool.map(JsonVal::Bool), - parse_num.map(JsonVal::Number), - parse_str.map(JsonVal::String), - parse_array.map(JsonVal::Array), - parse_object.map(JsonVal::Object), - )), - ws, - ) - .parse_next(buf) -} - -/// Parses the next key + `:` delimiter and asserts that the key matches the -/// passed-in value. To get the corresponding value, parse with something like: -/// -/// ``` -/// # use codecov_rs::parsers::json::{specific_key, json_value, JsonVal}; -/// # use winnow::combinator::preceded; -/// # use winnow::Parser; -/// let expected = Ok(("", JsonVal::Array(vec![]))); -/// let result = preceded(specific_key("files"), json_value).parse_peek("\"files\": []"); -/// assert_eq!(expected, result); -/// ``` -/// -/// Not used in generic json parsing but helpful when writing parsers for json -/// data that adheres to a schema. -pub fn specific_key(key: &str) -> impl Parser + '_ { - move |i: &mut S| { - delimited(ws, parse_str, (ws, ':', ws)) - .verify(move |s: &String| s == key) - .parse_next(i) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_null() { - // test that an exact match succeeds - assert_eq!(parse_null.parse_peek("null"), Ok(("", "null"))); - - // test that trailing whitespace is not consumed / that trailing - // characters don't fail - assert_eq!(parse_null.parse_peek("null "), Ok((" ", "null"))); - - let malformed_test_cases = [ - " null", // test that whitespace is not stripped - "anull", // test that unexpected leading tokens fail - ]; - for test_case in &malformed_test_cases { - assert_eq!( - parse_null.parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } - - #[test] - fn test_parse_bool() { - // test that exact matches succeed - assert_eq!(parse_bool.parse_peek("true"), Ok(("", true))); - assert_eq!(parse_bool.parse_peek("false"), Ok(("", false))); - - // test that trailing whitespace is not consumed / that trailing - // characters don't fail - assert_eq!(parse_bool.parse_peek("true "), Ok((" ", true))); - assert_eq!(parse_bool.parse_peek("false "), Ok((" ", false))); - - let malformed_test_cases = [" true", " false", "atrue", "afalse"]; - for test_case in &malformed_test_cases { - assert_eq!( - parse_bool.parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } - - #[test] - fn test_parse_num() { - let json_num = |f| JsonNumber::from_f64(f).unwrap(); - // integers - assert_eq!(parse_num.parse_peek("34949"), Ok(("", json_num(34949.0)))); - assert_eq!(parse_num.parse_peek("-34949"), Ok(("", json_num(-34949.0)))); - - // decimals - assert_eq!( - parse_num.parse_peek("404.0101"), - Ok(("", json_num(404.0101))) - ); - assert_eq!( - parse_num.parse_peek("-404.0101"), - Ok(("", json_num(-404.0101))) - ); - assert_eq!(parse_num.parse_peek(".05"), Ok(("", json_num(0.05)))); - assert_eq!(parse_num.parse_peek("-.05"), Ok(("", json_num(-0.05)))); - - // scientific notation - assert_eq!(parse_num.parse_peek("3.3e5"), Ok(("", json_num(330000.0)))); - assert_eq!(parse_num.parse_peek("3.3e+5"), Ok(("", json_num(330000.0)))); - assert_eq!(parse_num.parse_peek("3.3e-5"), Ok(("", json_num(0.000033)))); - assert_eq!( - parse_num.parse_peek("-3.3e5"), - Ok(("", json_num(-330000.0))) - ); - assert_eq!( - parse_num.parse_peek("-3.3e+5"), - Ok(("", json_num(-330000.0))) - ); - assert_eq!( - parse_num.parse_peek("-3.3e-5"), - Ok(("", json_num(-0.000033))) - ); - assert_eq!(parse_num.parse_peek("3.3E5"), Ok(("", json_num(330000.0)))); - assert_eq!(parse_num.parse_peek("3.3E+5"), Ok(("", json_num(330000.0)))); - assert_eq!(parse_num.parse_peek("3.3E-5"), Ok(("", json_num(0.000033)))); - assert_eq!( - parse_num.parse_peek("-3.3E5"), - Ok(("", json_num(-330000.0))) - ); - assert_eq!( - parse_num.parse_peek("-3.3E+5"), - Ok(("", json_num(-330000.0))) - ); - assert_eq!( - parse_num.parse_peek("-3.3E-5"), - Ok(("", json_num(-0.000033))) - ); - - // trailing input - assert_eq!( - parse_num.parse_peek("3.abcde"), - Ok(("abcde", json_num(3.0))) - ); - assert_eq!(parse_num.parse_peek("3..."), Ok(("..", json_num(3.0)))); - assert_eq!( - parse_num.parse_peek("3.455.303"), - Ok((".303", json_num(3.455))) - ); - - let malformed_test_cases = [".", "aajad3.405"]; - for test_case in &malformed_test_cases { - assert_eq!( - parse_num.parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } - - #[test] - fn test_parse_char() { - assert_eq!(parse_char.parse_peek("a"), Ok(("", 'a'))); - - // escaping - assert_eq!(parse_char.parse_peek("\\n"), Ok(("", '\n'))); - assert_eq!(parse_char.parse_peek("\\r"), Ok(("", '\r'))); - assert_eq!(parse_char.parse_peek("\\t"), Ok(("", '\t'))); - assert_eq!(parse_char.parse_peek("\\\""), Ok(("", '"'))); - assert_eq!(parse_char.parse_peek("\\\'"), Ok(("", '\''))); - assert_eq!(parse_char.parse_peek("\\\\"), Ok(("", '\\'))); - - // pre-escaped characters - assert_eq!(parse_char.parse_peek("\n"), Ok(("", '\n'))); - assert_eq!(parse_char.parse_peek("\r"), Ok(("", '\r'))); - assert_eq!(parse_char.parse_peek("\t"), Ok(("", '\t'))); - assert_eq!(parse_char.parse_peek("'"), Ok(("", '\''))); - - // trailing input - assert_eq!(parse_char.parse_peek("abcde"), Ok(("bcde", 'a'))); - assert_eq!(parse_char.parse_peek("\\nbcde"), Ok(("bcde", '\n'))); - - // can't lead with " - assert_eq!( - parse_char.parse_peek("\""), - Err(ErrMode::Backtrack(ContextError::new())) - ); - } - - #[test] - fn test_parse_str() { - // normal cases - assert_eq!(parse_str.parse_peek("\"\""), Ok(("", "".to_string()))); - assert_eq!( - parse_str.parse_peek("\"hello world\""), - Ok(("", "hello world".to_string())) - ); - assert_eq!( - parse_str.parse_peek("\"string with\nnewline\""), - Ok(("", "string with\nnewline".to_string())) - ); - assert_eq!( - parse_str.parse_peek("\"string with\\nnewline\""), - Ok(("", "string with\nnewline".to_string())) - ); - assert_eq!( - parse_str.parse_peek("\"str with backslash \\\\\""), - Ok(("", "str with backslash \\".to_string())) - ); - assert_eq!( - parse_str.parse_peek("\"str with escaped quote \\\" \""), - Ok(("", "str with escaped quote \" ".to_string())) - ); - - // trailing input - assert_eq!( - parse_str.parse_peek("\"hello world\", asdjasd"), - Ok((", asdjasd", "hello world".to_string())) - ); - - // malformed - let malformed_test_cases = [ - "no surrounding quotes", - "\"no final quote", - "no beginning quote\"", - "\"str ending on escaped quote\\\"", - ]; - for test_case in &malformed_test_cases { - assert_eq!( - parse_str.parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } - - #[test] - fn test_parse_array() { - assert_eq!(parse_array.parse_peek("[]"), Ok(("", vec![]))); - assert_eq!( - parse_array.parse_peek("[3, null, true, false, \"str\", [], {}]"), - Ok(( - "", - vec![ - JsonVal::Number(JsonNumber::from_f64(3.0).unwrap()), - JsonVal::Null, - JsonVal::Bool(true), - JsonVal::Bool(false), - JsonVal::String("str".to_string()), - JsonVal::Array(vec![]), - JsonVal::Object(JsonMap::new()), - ] - )) - ); - - // same test case as above but with superfluous whitespace peppered around - assert_eq!( - parse_array - .parse_peek("[ 3 ,null , true , \n\t\tfalse, \t \"str\", [\n], {\r \t \n} ]"), - Ok(( - "", - vec![ - JsonVal::Number(JsonNumber::from_f64(3.0).unwrap()), - JsonVal::Null, - JsonVal::Bool(true), - JsonVal::Bool(false), - JsonVal::String("str".to_string()), - JsonVal::Array(vec![]), - JsonVal::Object(JsonMap::new()), - ] - )) - ); - - // trailing input - assert_eq!(parse_array.parse_peek("[]abcde"), Ok(("abcde", vec![]))); - assert_eq!(parse_array.parse_peek("[]]"), Ok(("]", vec![]))); - - // malformed - let malformed_test_cases = [ - "[4", - "[4,]", - "4[]", - "[4, null, unquoted string]", - "[4, null, {\"a\": 4]", - "[4, null, [\"str\", false, true]", - ]; - for test_case in &malformed_test_cases { - assert_eq!( - parse_array.parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } - - #[test] - fn test_parse_kv() { - assert_eq!( - parse_kv.parse_peek("\"key\": null"), - Ok(("", ("key".to_string(), JsonVal::Null))) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": true"), - Ok(("", ("key".to_string(), JsonVal::Bool(true)))) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": false"), - Ok(("", ("key".to_string(), JsonVal::Bool(false)))) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": 4.4"), - Ok(( - "", - ( - "key".to_string(), - JsonVal::Number(JsonNumber::from_f64(4.4).unwrap()) - ) - )), - ); - assert_eq!( - parse_kv.parse_peek("\"key\": \"str value\""), - Ok(( - "", - ("key".to_string(), JsonVal::String("str value".to_string())) - )) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": []"), - Ok(("", ("key".to_string(), JsonVal::Array(vec![])))) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": {}"), - Ok(("", ("key".to_string(), JsonVal::Object(JsonMap::new())))) - ); - - // empty string as a key is fine - assert_eq!( - parse_kv.parse_peek("\"\": null"), - Ok(("", ("".to_string(), JsonVal::Null))) - ); - - // pepper superfluous whitespace around - assert_eq!( - parse_kv.parse_peek("\"key\"\n\t :\n \t null"), - Ok(("", ("key".to_string(), JsonVal::Null))) - ); - - // trailing input - assert_eq!( - parse_kv.parse_peek("\"key\": null, \"key2\": null"), - Ok((", \"key2\": null", ("key".to_string(), JsonVal::Null))) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": null}"), - Ok(("}", ("key".to_string(), JsonVal::Null))) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": null]"), - Ok(("]", ("key".to_string(), JsonVal::Null))) - ); - assert_eq!( - parse_kv.parse_peek("\"key\": nulla"), - Ok(("a", ("key".to_string(), JsonVal::Null))) - ); - - // malformed - let malformed_test_cases = [ - "key: null", - "\"key: null", - "\"key\": ", - "key\": null", - "\"key\"; null", - "key: null", - ]; - for test_case in &malformed_test_cases { - assert_eq!( - parse_kv.parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } - - #[test] - fn test_parse_object() { - assert_eq!(parse_object.parse_peek("{}"), Ok(("", JsonMap::new()))); - assert_eq!( - parse_object.parse_peek("{\"key\": null}"), - Ok(("", JsonMap::from_iter([("key".to_string(), JsonVal::Null)]))) - ); - assert_eq!( - parse_object.parse_peek("{\"key\": null, \"key2\": null}"), - Ok(( - "", - JsonMap::from_iter([ - ("key".to_string(), JsonVal::Null), - ("key2".to_string(), JsonVal::Null) - ]) - )) - ); - assert_eq!( - parse_object.parse_peek("{ \"key\" \n \t:\t\n null\n}"), - Ok(("", JsonMap::from_iter([("key".to_string(), JsonVal::Null)]))) - ); - - // trailing input - assert_eq!( - parse_object.parse_peek("{}abcde"), - Ok(("abcde", JsonMap::new())) - ); - assert_eq!(parse_object.parse_peek("{}}"), Ok(("}", JsonMap::new()))); - assert_eq!(parse_object.parse_peek("{}]"), Ok(("]", JsonMap::new()))); - - // malformed - let malformed_test_cases = [ - "{\"key\": null,}", - "{\"key\": null", - "\"key\": null", - "key: null", - "{\"key\": }", - "{\"key\": , }", - "abcde {\"key\": null}", - ]; - for test_case in &malformed_test_cases { - assert_eq!( - parse_object.parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())), - ); - } - } - - #[test] - fn test_json_value() { - let test_cases = [ - "null", - "true", - "false", - "3.404", - "\"test string\"", - "[]", - "{}", - " \n\r\tnull\n ", - "\n\r true\r ", - " \n false\n ", - "\n 3.404\t ", - "\n \"test string\"\n ", - "\r\r\n\t []\t \t\r ", - " \r {}\r\r\n", - "[null, true, false, 3.4, \"str\", [], {}]", - "{\"null\": null, \"true\": true, \"false\": false, \"num\": 3.4, \"str\": \"str\", \"array\": [null, 3.3], \"object\": {\"k\": 4.4}}", - ]; - - for test_case in &test_cases { - let expected = serde_json::from_str(test_case).unwrap(); - assert_eq!(json_value.parse_peek(*test_case), Ok(("", expected))); - } - } - - #[test] - fn test_specific_key() { - assert_eq!( - specific_key("files").parse_peek("\"files\": {\"src/report.rs"), - Ok(("{\"src/report.rs", "files".to_string())) - ); - - // malformed - let malformed_test_cases = [ - "files\": {\"src", - "\"files: {\"src", - "leading\"files\": {\"src", - ]; - for test_case in &malformed_test_cases { - assert_eq!( - specific_key("files").parse_peek(*test_case), - Err(ErrMode::Backtrack(ContextError::new())) - ); - } - } -} diff --git a/core/src/parsers/mod.rs b/core/src/parsers/mod.rs index 2987e90..a40d1bf 100644 --- a/core/src/parsers/mod.rs +++ b/core/src/parsers/mod.rs @@ -1,6 +1,2 @@ -pub mod json; - #[cfg(feature = "pyreport")] pub mod pyreport; - -pub mod common; diff --git a/core/src/parsers/pyreport/chunks.rs b/core/src/parsers/pyreport/chunks.rs index d4a8562..8edd8d3 100644 --- a/core/src/parsers/pyreport/chunks.rs +++ b/core/src/parsers/pyreport/chunks.rs @@ -34,7 +34,7 @@ //! `report_line_or_empty` parser which wraps this and supports empty lines //! returns `Ok(())`. -use std::{collections::HashMap, fmt, mem, sync::OnceLock}; +use std::{collections::HashMap, fmt, marker::PhantomData, mem, sync::OnceLock}; use memchr::{memchr, memmem}; use serde::{de, de::IgnoredAny, Deserialize}; @@ -42,7 +42,6 @@ use serde::{de, de::IgnoredAny, Deserialize}; use super::{report_json::ParsedReportJson, utils}; use crate::{ error::CodecovError, - parsers::common::ReportBuilderCtx, report::{ pyreport::{ types::{self, CoverageType, MissingBranch, Partial, PyreportCoverage, ReportLine}, @@ -67,7 +66,9 @@ pub struct ChunkCtx { pub struct ParseCtx> { /// Rather than returning parsed results, we write them to this /// `report_builder`. - pub db: ReportBuilderCtx, + pub report_builder: B, + // FIXME: Rust, you are drunk. We need `R`. + _phantom: PhantomData, /// Tracks the labels that we've already added to the report. The key is the /// identifier for the label inside the chunks file and the value is the @@ -100,7 +101,8 @@ impl> ParseCtx { ) -> ParseCtx { ParseCtx { labels_index: HashMap::new(), - db: ReportBuilderCtx::new(report_builder), + report_builder, + _phantom: PhantomData, chunk: ChunkCtx { index: 0, current_line: 0, @@ -114,7 +116,7 @@ impl> ParseCtx { impl> fmt::Debug for ParseCtx { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ParseCtx") - .field("db", &self.db) + .field("report_builder", &format_args!("...")) .field("labels_index", &self.labels_index) .field("chunk", &self.chunk) .finish() @@ -173,7 +175,7 @@ where for datapoint in datapoints.values() { for label in &datapoint.labels { if !ctx.labels_index.contains_key(label) { - let context = ctx.db.report_builder.insert_context(label)?; + let context = ctx.report_builder.insert_context(label)?; ctx.labels_index.insert(label.into(), context.id); } } diff --git a/core/src/parsers/pyreport/utils.rs b/core/src/parsers/pyreport/utils.rs index fb6e4bd..562c684 100644 --- a/core/src/parsers/pyreport/utils.rs +++ b/core/src/parsers/pyreport/utils.rs @@ -231,7 +231,7 @@ pub fn save_report_lines>( // assigned as a side-effect of this insertion. That lets us populate the // `local_sample_id` foreign key on all of the models associated with each // `CoverageSample`. - ctx.db.report_builder.multi_insert_coverage_sample( + ctx.report_builder.multi_insert_coverage_sample( models .iter_mut() .map(|LineSessionModels { sample, .. }| sample) @@ -240,7 +240,7 @@ pub fn save_report_lines>( // Populate `local_sample_id` and insert all of the context assocs for each // `LineSession` (if there are any) - ctx.db.report_builder.multi_associate_context( + ctx.report_builder.multi_associate_context( models .iter_mut() .flat_map(|LineSessionModels { sample, assocs, .. }| { @@ -254,7 +254,7 @@ pub fn save_report_lines>( // Populate `local_sample_id` and insert all of the `BranchesData` records for // each `LineSession` (if there are any) - ctx.db.report_builder.multi_insert_branches_data( + ctx.report_builder.multi_insert_branches_data( models .iter_mut() .flat_map( @@ -272,7 +272,7 @@ pub fn save_report_lines>( // Populate `local_sample_id` and insert the single `MethodData` record for each // `LineSession` (if there is one) - ctx.db.report_builder.multi_insert_method_data( + ctx.report_builder.multi_insert_method_data( models .iter_mut() .filter_map(|LineSessionModels { sample, method, .. }| { @@ -289,7 +289,7 @@ pub fn save_report_lines>( // Populate `local_sample_id` and insert all of the `SpanData` records for each // `LineSession` (if there are any). In a chunks file, only spans that are // subsets of a single line are recorded. - ctx.db.report_builder.multi_insert_span_data( + ctx.report_builder.multi_insert_span_data( models .iter_mut() .flat_map( @@ -1190,7 +1190,7 @@ mod tests { // Now we actually run the function save_report_lines(&report_lines, &mut test_ctx.parse_ctx).unwrap(); - let report = test_ctx.parse_ctx.db.report_builder.build().unwrap(); + let report = test_ctx.parse_ctx.report_builder.build().unwrap(); // Now we need to set up our mock expectations. There are a lot of them. // First thing that gets inserted is CoverageSample. We expect 4 of them, diff --git a/core/src/report/models.rs b/core/src/report/models.rs index 41ca5d0..e94bc45 100644 --- a/core/src/report/models.rs +++ b/core/src/report/models.rs @@ -97,8 +97,6 @@ use serde::Deserialize; -use crate::parsers::json::JsonVal; - #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)] #[serde(try_from = "&str")] pub enum CoverageType { @@ -378,7 +376,7 @@ pub struct RawUpload { /// /// Ex: `["unit"]` /// Ex: `["integration", "windows"]` - pub flags: Option, + pub flags: Option, /// Key in the report JSON: `"c"` pub provider: Option, @@ -429,7 +427,7 @@ pub struct RawUpload { /// /// Ex: `{"carriedforward_from": /// "bcec3478e2a27bb7950f40388cf191834fb2d5a3"}` - pub session_extras: Option, + pub session_extras: Option, } /// Aggregated coverage metrics for lines, branches, and sessions in a report diff --git a/core/src/report/pyreport/chunks.rs b/core/src/report/pyreport/chunks.rs index 1d45a1e..8a75de1 100644 --- a/core/src/report/pyreport/chunks.rs +++ b/core/src/report/pyreport/chunks.rs @@ -1,11 +1,10 @@ use std::io::Write; -use serde_json::json; +use serde_json::{json, Number as JsonNumber, Value as JsonVal}; use super::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR}; use crate::{ error::{CodecovError, Result}, - parsers::json::{JsonNumber, JsonVal}, report::{models, sqlite::json_value_from_sql, SqliteReport}, }; diff --git a/core/src/report/pyreport/report_json.rs b/core/src/report/pyreport/report_json.rs index 6fd882c..d83fcce 100644 --- a/core/src/report/pyreport/report_json.rs +++ b/core/src/report/pyreport/report_json.rs @@ -1,10 +1,9 @@ use std::io::Write; -use serde_json::json; +use serde_json::{json, Value as JsonVal}; use crate::{ error::Result, - parsers::json::JsonVal, report::{models, sqlite::json_value_from_sql, SqliteReport}, }; diff --git a/core/src/report/pyreport/types.rs b/core/src/report/pyreport/types.rs index 8402b80..c0aa1c1 100644 --- a/core/src/report/pyreport/types.rs +++ b/core/src/report/pyreport/types.rs @@ -3,7 +3,6 @@ use std::collections::HashMap; use serde::Deserialize; pub use super::super::models::CoverageType; -use crate::parsers::json::JsonVal; #[cfg(doc)] use crate::report::models; @@ -176,7 +175,7 @@ pub struct ReportLine { pub sessions: Vec, /// Long forgotten field that takes up space. - pub _messages: Option>, + pub _messages: Option>, /// An aggregated complexity metric across all of the [`LineSession`]s in /// `sessions`. diff --git a/core/src/report/sqlite/models.rs b/core/src/report/sqlite/models.rs index 3af0c25..bf605b6 100644 --- a/core/src/report/sqlite/models.rs +++ b/core/src/report/sqlite/models.rs @@ -13,7 +13,7 @@ use rusqlite::types::{FromSql, FromSqlResult, ToSql, ToSqlOutput, ValueRef}; use super::super::models::*; -use crate::{error::Result, parsers::json::JsonVal}; +use crate::error::Result; /// Takes care of the boilerplate to insert a model into the database. /// Implementers must provide three things: @@ -142,8 +142,8 @@ pub trait Insertable { /// Can't implement foreign traits (`ToSql`/`FromSql`) on foreign types /// (`serde_json::Value`) so this helper function fills in. -pub fn json_value_from_sql(s: String, col: usize) -> rusqlite::Result { - serde_json::from_str(s.as_str()).map_err(|e| { +pub fn json_value_from_sql(s: String, col: usize) -> rusqlite::Result { + serde_json::from_str(&s).map_err(|e| { rusqlite::Error::FromSqlConversionFailure(col, rusqlite::types::Type::Text, Box::new(e)) }) } From 7e40eed8d3d83f2b8df95786767b2104ff9fbde0 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 12 Nov 2024 14:43:32 +0100 Subject: [PATCH 8/9] switch to stable rust and reformat imports --- .github/workflows/bench.yml | 3 ++- .github/workflows/ci.yml | 14 ++++++------ README.md | 9 +++++--- bindings/src/error.rs | 3 ++- bindings/src/lib.rs | 3 ++- core/benches/pyreport.rs | 10 ++++----- core/examples/parse_pyreport.rs | 8 +++++-- core/examples/sql_to_pyreport.rs | 10 ++++----- core/src/error.rs | 4 ---- core/src/lib.rs | 2 -- core/src/parsers/pyreport/chunks.rs | 27 ++++++++++++------------ core/src/parsers/pyreport/mod.rs | 3 ++- core/src/parsers/pyreport/report_json.rs | 9 ++++---- core/src/parsers/pyreport/utils.rs | 15 +++++-------- core/src/report/pyreport/chunks.rs | 7 +++--- core/src/report/pyreport/mod.rs | 6 ++---- core/src/report/pyreport/report_json.rs | 7 +++--- core/src/report/sqlite/mod.rs | 3 ++- core/src/report/sqlite/models.rs | 10 +++------ core/src/report/sqlite/report.rs | 12 +++++------ core/src/report/sqlite/report_builder.rs | 15 ++++++------- core/src/test_utils/sqlite_report.rs | 7 +++--- core/src/test_utils/test_report.rs | 14 +++++------- core/tests/test_pyreport_shim.rs | 24 ++++++++++----------- rust-toolchain.toml | 2 -- rustfmt.toml | 3 --- test_utils/src/fixtures.rs | 10 ++++----- 27 files changed, 109 insertions(+), 131 deletions(-) delete mode 100644 rust-toolchain.toml delete mode 100644 rustfmt.toml diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 4d4cc10..b096b32 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -16,7 +16,8 @@ jobs: with: lfs: true - - run: rustup toolchain install nightly --profile minimal --no-self-update + - run: rustup toolchain install stable --profile minimal --no-self-update + - uses: Swatinem/rust-cache@v2 - uses: cargo-bins/cargo-binstall@main - run: cargo binstall cargo-codspeed diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d2e365..6290912 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,8 @@ jobs: steps: - uses: actions/checkout@v4 - - run: rustup toolchain install nightly --profile minimal --component rustfmt --component clippy --no-self-update + - run: rustup toolchain install stable --profile minimal --component rustfmt --component clippy --no-self-update + - uses: Swatinem/rust-cache@v2 - run: cargo fmt --all -- --check - run: cargo clippy --all-features --workspace --tests --examples -- -D clippy::all @@ -46,7 +47,8 @@ jobs: steps: - uses: actions/checkout@v4 - - run: rustup toolchain install nightly --profile minimal --no-self-update + - run: rustup toolchain install stable --profile minimal --no-self-update + - uses: Swatinem/rust-cache@v2 - run: cargo test --workspace --all-features --doc - run: cargo doc --workspace --all-features --document-private-items --no-deps @@ -57,14 +59,12 @@ jobs: steps: - uses: actions/checkout@v4 - - run: rustup toolchain install nightly --profile minimal --no-self-update + - run: rustup toolchain install stable --profile minimal --no-self-update + - uses: Swatinem/rust-cache@v2 - uses: taiki-e/install-action@cargo-llvm-cov - uses: taiki-e/install-action@nextest - # FIXME(swatinem): We should pass `--all-targets` to also compile and tests benchmarks - # Though currently `divan` does not support all CLI arguments as used by `nextest`, - # and benchmarks are unbearably slow anyway, so its not feasible to run in debug builds. - - run: cargo llvm-cov nextest --lcov --output-path core.lcov --workspace --all-features + - run: cargo llvm-cov nextest --lcov --output-path core.lcov --workspace --all-features --all-targets - run: mv target/nextest/default/core-test-results.xml . - uses: actions/setup-python@v5 diff --git a/README.md b/README.md index 7d0dba2..acad8dc 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,19 @@ All details (e.g. SQLite schema, code interfaces) subject to breaking changes un ## Developing Set up your development environment: -- Install the nightly compiler via [rustup](https://rustup.rs/). At time of writing, `codecov-rs` requires the nightly compiler for niceties such as `#[feature(trait_alias)]`. + - To work on the Python bindings, run `source .envrc` (or use `direnv`) to set up a virtual environment. Update development dependencies with `pip install -r python/requirements.dev.txt` - Install lint hooks with `pip install pre-commit && pre-commit install`. - Large sample test reports are checked in using [Git LFS](https://git-lfs.com/) in `test_utils/fixtures/**/large` directories (e.g. `test_utils/fixtures/pyreport/large`). Tests and benchmarks may reference them so installing it yourself is recommended. `codecov-rs` aims to serve as effective documentation for every flavor of every format it supports. To that end, the following are greatly appreciated in submissions: + - Thorough doc comments (`///` / `/**`). For parsers, include snippets that show what inputs look like - Granular, in-module unit tests - Integration tests with real-world samples (that are safe to distribute; don't send us data from your private repo) The `core/examples/` directory contains runnable commands for developers including: + - `parse_pyreport`: converts a given pyreport into a SQLite report - `sql_to_pyreport`: converts a given SQLite report into a pyreport (report JSON + chunks file) @@ -53,9 +55,8 @@ New parsers should be optional via Cargo features. Adding them to the default fe Where possible, parsers should not load their entire input or output into RAM. On the input side, you can avoid that with a _streaming_ parser or by using `memmap2` to map the input file into virtual memory. SQLite makes it straightforward enough to stream outputs to the database. Coverage formats really run the gamut so there's no one-size-fits-all framework we can use. Some options: + - [`quick_xml`](https://crates.io/crates/quick_xml), a streaming XML parser -- [`winnow`](https://crates.io/crates/winnow), a parser combinator framework (fork of [`nom`](https://crates.io/crates/nom)) - - `winnow`'s docs illustrate [how one can write a streaming parser](https://docs.rs/winnow/latest/winnow/_topic/partial/index.html) - [`serde`](https://serde.rs/), a popular serialization/deserialization framework - `serde`'s docs illustrate [how one can write a streaming parser](https://serde.rs/stream-array.html) @@ -64,6 +65,7 @@ Non-XML formats lack clean OOTB support for streaming so `codecov-rs` currently ### Testing Run tests with: + ``` # Rust tests $ cargo test @@ -75,6 +77,7 @@ $ pytest ### Benchmarks Run benchmarks with: + ``` $ cargo bench --features testing ``` diff --git a/bindings/src/error.rs b/bindings/src/error.rs index 2c9532f..123d9fa 100644 --- a/bindings/src/error.rs +++ b/bindings/src/error.rs @@ -1,5 +1,6 @@ pub use codecov_rs::error::CodecovError as RsCodecovError; -use pyo3::{exceptions::PyRuntimeError, prelude::*}; +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; pub struct PyCodecovError(RsCodecovError); diff --git a/bindings/src/lib.rs b/bindings/src/lib.rs index 6c8e4e4..cef1cb2 100644 --- a/bindings/src/lib.rs +++ b/bindings/src/lib.rs @@ -1,4 +1,5 @@ -use std::{fs::File, path::PathBuf}; +use std::fs::File; +use std::path::PathBuf; use codecov_rs::{parsers, report}; use pyo3::prelude::*; diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index 8ef9849..25355db 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; -use codecov_rs::{ - parsers::pyreport::{chunks, report_json}, - test_utils::test_report::TestReportBuilder, -}; +use codecov_rs::parsers::pyreport::{chunks, report_json}; +use codecov_rs::test_utils::test_report::TestReportBuilder; use criterion::{criterion_group, criterion_main, Criterion}; -use test_utils::fixtures::{read_fixture, FixtureFormat::Pyreport, FixtureSize::Large}; +use test_utils::fixtures::read_fixture; +use test_utils::fixtures::FixtureFormat::Pyreport; +use test_utils::fixtures::FixtureSize::Large; criterion_group!( benches, diff --git a/core/examples/parse_pyreport.rs b/core/examples/parse_pyreport.rs index 42faac0..ba144be 100644 --- a/core/examples/parse_pyreport.rs +++ b/core/examples/parse_pyreport.rs @@ -1,6 +1,10 @@ -use std::{env, fs::File, path::PathBuf}; +use std::env; +use std::fs::File; +use std::path::PathBuf; -use codecov_rs::{error::Result, parsers::pyreport::parse_pyreport, report::SqliteReportBuilder}; +use codecov_rs::error::Result; +use codecov_rs::parsers::pyreport::parse_pyreport; +use codecov_rs::report::SqliteReportBuilder; fn usage_error() -> ! { println!("Usage:"); diff --git a/core/examples/sql_to_pyreport.rs b/core/examples/sql_to_pyreport.rs index 3f57c10..2e4a041 100644 --- a/core/examples/sql_to_pyreport.rs +++ b/core/examples/sql_to_pyreport.rs @@ -1,9 +1,9 @@ -use std::{env, fs::File}; +use std::env; +use std::fs::File; -use codecov_rs::{ - error::Result, - report::{pyreport::ToPyreport, SqliteReport}, -}; +use codecov_rs::error::Result; +use codecov_rs::report::pyreport::ToPyreport; +use codecov_rs::report::SqliteReport; fn usage_error() -> ! { println!("Usage:"); diff --git a/core/src/error.rs b/core/src/error.rs index e184141..3cb9697 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -15,10 +15,6 @@ pub enum CodecovError { #[error("report builder error: '{0}'")] ReportBuilderError(String), - // Can't use #[from] - #[error("parser error: '{0}'")] - ParserError(winnow::error::ContextError), - #[error("parser error: '{0}'")] Json(#[from] serde_json::Error), diff --git a/core/src/lib.rs b/core/src/lib.rs index e267ea2..90b5e1f 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,5 +1,3 @@ -#![feature(trait_alias)] - pub mod report; pub mod parsers; diff --git a/core/src/parsers/pyreport/chunks.rs b/core/src/parsers/pyreport/chunks.rs index 8edd8d3..ddbfb2c 100644 --- a/core/src/parsers/pyreport/chunks.rs +++ b/core/src/parsers/pyreport/chunks.rs @@ -34,22 +34,23 @@ //! `report_line_or_empty` parser which wraps this and supports empty lines //! returns `Ok(())`. -use std::{collections::HashMap, fmt, marker::PhantomData, mem, sync::OnceLock}; +use std::collections::HashMap; +use std::marker::PhantomData; +use std::sync::OnceLock; +use std::{fmt, mem}; use memchr::{memchr, memmem}; -use serde::{de, de::IgnoredAny, Deserialize}; - -use super::{report_json::ParsedReportJson, utils}; -use crate::{ - error::CodecovError, - report::{ - pyreport::{ - types::{self, CoverageType, MissingBranch, Partial, PyreportCoverage, ReportLine}, - CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR, - }, - Report, ReportBuilder, - }, +use serde::de::IgnoredAny; +use serde::{de, Deserialize}; + +use super::report_json::ParsedReportJson; +use super::utils; +use crate::error::CodecovError; +use crate::report::pyreport::types::{ + self, CoverageType, MissingBranch, Partial, PyreportCoverage, ReportLine, }; +use crate::report::pyreport::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR}; +use crate::report::{Report, ReportBuilder}; #[derive(PartialEq, Debug)] pub struct ChunkCtx { diff --git a/core/src/parsers/pyreport/mod.rs b/core/src/parsers/pyreport/mod.rs index 537cf29..baee7f0 100644 --- a/core/src/parsers/pyreport/mod.rs +++ b/core/src/parsers/pyreport/mod.rs @@ -2,7 +2,8 @@ use std::fs::File; use memmap2::Mmap; -use crate::{error::Result, report::SqliteReportBuilder}; +use crate::error::Result; +use crate::report::SqliteReportBuilder; pub mod chunks; pub mod report_json; diff --git a/core/src/parsers/pyreport/report_json.rs b/core/src/parsers/pyreport/report_json.rs index 05da433..554f6bb 100644 --- a/core/src/parsers/pyreport/report_json.rs +++ b/core/src/parsers/pyreport/report_json.rs @@ -163,13 +163,12 @@ use std::collections::{BTreeMap, HashMap}; -use serde::{de::IgnoredAny, Deserialize}; +use serde::de::IgnoredAny; +use serde::Deserialize; use serde_json::Value; -use crate::{ - error::CodecovError, - report::{models, Report, ReportBuilder}, -}; +use crate::error::CodecovError; +use crate::report::{models, Report, ReportBuilder}; #[derive(Debug, Deserialize)] struct ReportJson { diff --git a/core/src/parsers/pyreport/utils.rs b/core/src/parsers/pyreport/utils.rs index 562c684..671ead9 100644 --- a/core/src/parsers/pyreport/utils.rs +++ b/core/src/parsers/pyreport/utils.rs @@ -1,15 +1,10 @@ use super::chunks::ParseCtx; -use crate::{ - error::Result, - report::{ - models, - pyreport::types::{ - Complexity, CoverageDatapoint, LineSession, MissingBranch, Partial, PyreportCoverage, - ReportLine, - }, - Report, ReportBuilder, - }, +use crate::error::Result; +use crate::report::pyreport::types::{ + Complexity, CoverageDatapoint, LineSession, MissingBranch, Partial, PyreportCoverage, + ReportLine, }; +use crate::report::{models, Report, ReportBuilder}; fn separate_pyreport_complexity(complexity: &Complexity) -> (Option, Option) { let (covered, total) = match complexity { diff --git a/core/src/report/pyreport/chunks.rs b/core/src/report/pyreport/chunks.rs index 8a75de1..d6907f1 100644 --- a/core/src/report/pyreport/chunks.rs +++ b/core/src/report/pyreport/chunks.rs @@ -3,10 +3,9 @@ use std::io::Write; use serde_json::{json, Number as JsonNumber, Value as JsonVal}; use super::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR}; -use crate::{ - error::{CodecovError, Result}, - report::{models, sqlite::json_value_from_sql, SqliteReport}, -}; +use crate::error::{CodecovError, Result}; +use crate::report::sqlite::json_value_from_sql; +use crate::report::{models, SqliteReport}; /// To save space, trailing nulls are removed from arrays in `ReportLine`s. /// diff --git a/core/src/report/pyreport/mod.rs b/core/src/report/pyreport/mod.rs index 2bb2362..4499ed5 100644 --- a/core/src/report/pyreport/mod.rs +++ b/core/src/report/pyreport/mod.rs @@ -256,10 +256,8 @@ * - [`CoverageDatapoint`](https://github.com/codecov/shared/blob/f6c2c3852530192ab0c6b9fd0c0a800c2cbdb16f/shared/reports/types.py#L98) */ -use std::{ - fs::File, - io::{BufWriter, Write}, -}; +use std::fs::File; +use std::io::{BufWriter, Write}; use super::SqliteReport; use crate::error::Result; diff --git a/core/src/report/pyreport/report_json.rs b/core/src/report/pyreport/report_json.rs index d83fcce..ea214c2 100644 --- a/core/src/report/pyreport/report_json.rs +++ b/core/src/report/pyreport/report_json.rs @@ -2,10 +2,9 @@ use std::io::Write; use serde_json::{json, Value as JsonVal}; -use crate::{ - error::Result, - report::{models, sqlite::json_value_from_sql, SqliteReport}, -}; +use crate::error::Result; +use crate::report::sqlite::json_value_from_sql; +use crate::report::{models, SqliteReport}; /// Coverage percentages are written with 5 decimal places of precision unless /// they are 0 or 100. diff --git a/core/src/report/sqlite/mod.rs b/core/src/report/sqlite/mod.rs index 7990682..1511a56 100644 --- a/core/src/report/sqlite/mod.rs +++ b/core/src/report/sqlite/mod.rs @@ -6,7 +6,8 @@ * - Some `ORDER BY` clauses are to make writing test cases simple and may * not be necessary */ -use std::{path::PathBuf, sync::LazyLock}; +use std::path::PathBuf; +use std::sync::LazyLock; use include_dir::{include_dir, Dir}; use rusqlite::Connection; diff --git a/core/src/report/sqlite/models.rs b/core/src/report/sqlite/models.rs index bf605b6..aa36f80 100644 --- a/core/src/report/sqlite/models.rs +++ b/core/src/report/sqlite/models.rs @@ -556,13 +556,9 @@ mod tests { use serde_json::json; use tempfile::TempDir; - use super::{ - super::{ - super::{Report, ReportBuilder}, - SqliteReport, SqliteReportBuilder, - }, - *, - }; + use super::super::super::{Report, ReportBuilder}; + use super::super::{SqliteReport, SqliteReportBuilder}; + use super::*; #[derive(PartialEq, Debug)] struct TestModel { diff --git a/core/src/report/sqlite/report.rs b/core/src/report/sqlite/report.rs index c39bd96..d883178 100644 --- a/core/src/report/sqlite/report.rs +++ b/core/src/report/sqlite/report.rs @@ -1,12 +1,11 @@ -use std::{fmt, path::PathBuf}; +use std::fmt; +use std::path::PathBuf; use rusqlite::{Connection, OptionalExtension}; use super::open_database; -use crate::{ - error::Result, - report::{models, Report}, -}; +use crate::error::Result; +use crate::report::{models, Report}; pub struct SqliteReport { pub filename: PathBuf, @@ -182,7 +181,8 @@ mod tests { use rusqlite_migration::SchemaVersion; use tempfile::TempDir; - use super::{super::SqliteReportBuilder, *}; + use super::super::SqliteReportBuilder; + use super::*; use crate::report::ReportBuilder; struct Ctx { diff --git a/core/src/report/sqlite/report_builder.rs b/core/src/report/sqlite/report_builder.rs index 3ccbf2e..d67ab8f 100644 --- a/core/src/report/sqlite/report_builder.rs +++ b/core/src/report/sqlite/report_builder.rs @@ -1,16 +1,13 @@ -use std::{ - ops::RangeFrom, - path::{Path, PathBuf}, -}; +use std::ops::RangeFrom; +use std::path::{Path, PathBuf}; use rand::Rng; use rusqlite::{Connection, Transaction}; -use super::{models::Insertable, open_database, SqliteReport}; -use crate::{ - error::{CodecovError, Result}, - report::{models, ReportBuilder}, -}; +use super::models::Insertable; +use super::{open_database, SqliteReport}; +use crate::error::{CodecovError, Result}; +use crate::report::{models, ReportBuilder}; /// Returned by [`SqliteReportBuilder::transaction`]. Contains the actual /// implementation for most of the [`ReportBuilder`] trait except for `build()` diff --git a/core/src/test_utils/sqlite_report.rs b/core/src/test_utils/sqlite_report.rs index f343ece..9a6f57f 100644 --- a/core/src/test_utils/sqlite_report.rs +++ b/core/src/test_utils/sqlite_report.rs @@ -2,10 +2,9 @@ use std::path::PathBuf; use serde_json::json; -use crate::{ - error::Result, - report::{models, sqlite::Insertable, ReportBuilder, SqliteReport, SqliteReportBuilder}, -}; +use crate::error::Result; +use crate::report::sqlite::Insertable; +use crate::report::{models, ReportBuilder, SqliteReport, SqliteReportBuilder}; pub fn build_sample_report(path: PathBuf) -> Result { let mut builder = SqliteReportBuilder::open(path)?; diff --git a/core/src/test_utils/test_report.rs b/core/src/test_utils/test_report.rs index 5370b4b..c83fc93 100644 --- a/core/src/test_utils/test_report.rs +++ b/core/src/test_utils/test_report.rs @@ -1,13 +1,9 @@ -use crate::{ - error, - report::{ - models::{ - BranchesData, Context, ContextAssoc, CoverageSample, MethodData, RawUpload, - ReportTotals, SourceFile, SpanData, - }, - Report, ReportBuilder, - }, +use crate::error; +use crate::report::models::{ + BranchesData, Context, ContextAssoc, CoverageSample, MethodData, RawUpload, ReportTotals, + SourceFile, SpanData, }; +use crate::report::{Report, ReportBuilder}; #[derive(Default)] pub struct TestReport { diff --git a/core/tests/test_pyreport_shim.rs b/core/tests/test_pyreport_shim.rs index de0aad3..c5c1ddc 100644 --- a/core/tests/test_pyreport_shim.rs +++ b/core/tests/test_pyreport_shim.rs @@ -1,17 +1,17 @@ -use std::{collections::HashMap, fs::File, io::Seek, path::PathBuf}; - -use codecov_rs::{ - parsers::pyreport::{ - self, chunks, - report_json::{self, ParsedReportJson}, - }, - report::{models, pyreport::ToPyreport, Report, ReportBuilder, SqliteReportBuilder}, -}; +use std::collections::HashMap; +use std::fs::File; +use std::io::Seek; +use std::path::PathBuf; + +use codecov_rs::parsers::pyreport::report_json::{self, ParsedReportJson}; +use codecov_rs::parsers::pyreport::{self, chunks}; +use codecov_rs::report::pyreport::ToPyreport; +use codecov_rs::report::{models, Report, ReportBuilder, SqliteReportBuilder}; use serde_json::json; use tempfile::TempDir; -use test_utils::fixtures::{ - open_fixture, read_fixture, FixtureFormat::Pyreport, FixtureSize::Small, -}; +use test_utils::fixtures::FixtureFormat::Pyreport; +use test_utils::fixtures::FixtureSize::Small; +use test_utils::fixtures::{open_fixture, read_fixture}; struct Ctx { temp_dir: TempDir, diff --git a/rust-toolchain.toml b/rust-toolchain.toml deleted file mode 100644 index 5d56faf..0000000 --- a/rust-toolchain.toml +++ /dev/null @@ -1,2 +0,0 @@ -[toolchain] -channel = "nightly" diff --git a/rustfmt.toml b/rustfmt.toml deleted file mode 100644 index fcdcca3..0000000 --- a/rustfmt.toml +++ /dev/null @@ -1,3 +0,0 @@ -group_imports="StdExternalCrate" -imports_granularity="Crate" -wrap_comments=true diff --git a/test_utils/src/fixtures.rs b/test_utils/src/fixtures.rs index e917cf2..5469317 100644 --- a/test_utils/src/fixtures.rs +++ b/test_utils/src/fixtures.rs @@ -1,9 +1,7 @@ -use std::{ - fmt, - fs::File, - io::{Read, Seek}, - path::PathBuf, -}; +use std::fmt; +use std::fs::File; +use std::io::{Read, Seek}; +use std::path::PathBuf; #[derive(Copy, Clone)] pub enum FixtureFormat { From 56391d0432fd457760cca67a2bfb840b105ceaca Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 12 Nov 2024 14:50:02 +0100 Subject: [PATCH 9/9] fix CI failures --- .github/workflows/ci.yml | 2 ++ core/src/parsers/pyreport/chunks.rs | 2 ++ core/src/parsers/pyreport/mod.rs | 10 ++++++---- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6290912..0a80352 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,6 +58,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + lfs: true - run: rustup toolchain install stable --profile minimal --no-self-update - uses: Swatinem/rust-cache@v2 diff --git a/core/src/parsers/pyreport/chunks.rs b/core/src/parsers/pyreport/chunks.rs index ddbfb2c..ab569b6 100644 --- a/core/src/parsers/pyreport/chunks.rs +++ b/core/src/parsers/pyreport/chunks.rs @@ -46,6 +46,8 @@ use serde::{de, Deserialize}; use super::report_json::ParsedReportJson; use super::utils; use crate::error::CodecovError; +#[cfg(doc)] +use crate::report::models; use crate::report::pyreport::types::{ self, CoverageType, MissingBranch, Partial, PyreportCoverage, ReportLine, }; diff --git a/core/src/parsers/pyreport/mod.rs b/core/src/parsers/pyreport/mod.rs index baee7f0..3b07289 100644 --- a/core/src/parsers/pyreport/mod.rs +++ b/core/src/parsers/pyreport/mod.rs @@ -3,6 +3,8 @@ use std::fs::File; use memmap2::Mmap; use crate::error::Result; +#[cfg(doc)] +use crate::report::models; use crate::report::SqliteReportBuilder; pub mod chunks; @@ -18,14 +20,14 @@ mod utils; /// - Chunks file, which describes line-by-line coverage data for each file /// /// The parser for the report JSON inserts a -/// [`crate::report::models::SourceFile`] for each file -/// and a [`crate::report::models::Context`] for each session. It returns two +/// [`SourceFile`](models::SourceFile) for each file +/// and a [`Context`](models::Context) for each session. It returns two /// hashmaps: one which maps each file's "chunk index" to the database PK for -/// the `SourceFile` that was inserted for it, and one which maps each session's +/// the [`SourceFile`](models::SourceFile) that was inserted for it, and one which maps each session's /// "session_id" to the database PK for the `Context` that was inserted for it. /// /// The parser for the chunks file inserts a -/// [`crate::report::models::CoverageSample`] (and possibly other records) for +/// [`CoverageSample`](models::CoverageSample) (and possibly other records) for /// each coverage measurement contained in the chunks file. It uses the /// results of the report JSON parser to figure out the appropriate FKs to /// associate a measurement with its `SourceFile` and `Context`(s).