Skip to content

Commit

Permalink
get closer to the existing parser interface dealing with report builders
Browse files Browse the repository at this point in the history
  • Loading branch information
Swatinem committed Sep 4, 2024
1 parent e0dd890 commit 12ee764
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 25 deletions.
28 changes: 17 additions & 11 deletions core/benches/pyreport.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, hint::black_box};
use std::collections::HashMap;

use codecov_rs::{
parsers::pyreport::{chunks, chunks_serde, report_json},
Expand Down Expand Up @@ -109,8 +109,13 @@ fn simple_chunks_serde() {
b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
];

let report_json = report_json::ParsedReportJson {
files: Default::default(),
sessions: Default::default(),
};

for input in chunks {
parse_chunks_file_serde(input)
parse_chunks_file_serde(input, &report_json);
}
}

Expand All @@ -121,17 +126,18 @@ fn complex_chunks_serde(bencher: Bencher) {
let chunks =
load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt");

bencher.bench(|| parse_chunks_file_serde(&chunks));
// parsing the chunks depends on having loaded the `report_json`
let report = load_fixture(
"pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json",
);
let report_json = parse_report_json(&report);

bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json));
}

fn parse_chunks_file_serde(input: &[u8]) {
let chunks_file = chunks_serde::ChunksFile::new(input).unwrap();
let mut chunks = chunks_file.chunks();
while let Some(mut chunk) = chunks.next_chunk().unwrap() {
while let Some(line) = chunk.next_line().unwrap() {
black_box(line);
}
}
fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) {
let mut report_builder = TestReportBuilder::default();
chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap();
}

#[track_caller]
Expand Down
5 changes: 5 additions & 0 deletions core/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use thiserror::Error;

use crate::parsers::pyreport::chunks_serde::ChunksFileParseError;

pub type Result<T, E = CodecovError> = std::result::Result<T, E>;

#[derive(Error, Debug)]
Expand All @@ -26,4 +28,7 @@ pub enum CodecovError {
#[cfg(feature = "pyreport")]
#[error("failed to convert sqlite to pyreport: '{0}'")]
PyreportConversionError(String),

#[error(transparent)]
ChunksFileParseError(#[from] ChunksFileParseError),
}
115 changes: 101 additions & 14 deletions core/src/parsers/pyreport/chunks_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,84 @@ use std::{collections::HashMap, fmt, mem, sync::OnceLock};
use memchr::{memchr, memmem};
use serde::{de, de::IgnoredAny, Deserialize};

use crate::report::pyreport::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR};
use super::report_json::ParsedReportJson;
use crate::{
error::CodecovError,
report::{
models,
pyreport::{
types::{self, PyreportCoverage, ReportLine},
CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR,
},
Report, ReportBuilder,
},
};

pub fn parse_chunks_file<B, R>(
input: &[u8],
_report_json: &ParsedReportJson,
builder: &mut B,
) -> Result<(), CodecovError>
where
B: ReportBuilder<R>,
R: Report,
{
let chunks_file = ChunksFile::new(input)?;

let mut labels_index = HashMap::with_capacity(chunks_file.labels_index().len());
for (index, name) in chunks_file.labels_index() {
let context = builder.insert_context(name)?;
labels_index.insert(index.clone(), context.id);
}

let mut report_lines = vec![];

let mut chunks = chunks_file.chunks();
while let Some(mut chunk) = chunks.next_chunk()? {
let mut line_no = 0;
report_lines.clear();
while let Some(line) = chunk.next_line()? {
line_no += 1;
if let Some(line) = line {
let coverage_type = match line.1.unwrap_or_default() {
CoverageType::Line => models::CoverageType::Line,
CoverageType::Branch => models::CoverageType::Branch,
CoverageType::Method => models::CoverageType::Method,
};
let sessions = line
.2
.into_iter()
.map(|session| types::LineSession {
session_id: session.0,
coverage: session.1.into(),
branches: None, // TODO
partials: None, // TODO
complexity: None, // TODO
})
.collect();

let mut report_line = ReportLine {
line_no,
coverage: line.0.into(),
coverage_type,
sessions,
_messages: None,
_complexity: None,
datapoints: None, // TODO
};
report_line.normalize();
report_lines.push(report_line);
}
}
// TODO:
// utils::save_report_lines()?;
}

Ok(())
}

#[derive(Debug, thiserror::Error)]
pub enum ParserError {
pub enum ChunksFileParseError {
#[error("unexpected EOF")]
UnexpectedEof,
#[error("unexpected input")]
Expand All @@ -53,12 +127,12 @@ pub enum ParserError {
InvalidLineRecord(#[source] serde_json::Error),
}

impl PartialEq for ParserError {
impl PartialEq for ChunksFileParseError {
fn eq(&self, other: &Self) -> bool {
core::mem::discriminant(self) == core::mem::discriminant(other)
}
}
impl Eq for ParserError {}
impl Eq for ChunksFileParseError {}

#[derive(Debug)]
pub struct ChunksFile<'d> {
Expand All @@ -67,16 +141,16 @@ pub struct ChunksFile<'d> {
}

impl<'d> ChunksFile<'d> {
pub fn new(mut input: &'d [u8]) -> Result<Self, ParserError> {
pub fn new(mut input: &'d [u8]) -> Result<Self, ChunksFileParseError> {
static HEADER_FINDER: OnceLock<memmem::Finder> = OnceLock::new();
let header_finder =
HEADER_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_HEADER_TERMINATOR));

let file_header = if let Some(pos) = header_finder.find(input) {
let header_bytes = &input[..pos];
input = &input[pos + header_finder.needle().len()..];
let file_header: FileHeader =
serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?;
let file_header: FileHeader = serde_json::from_slice(header_bytes)
.map_err(ChunksFileParseError::InvalidFileHeader)?;
file_header
} else {
FileHeader::default()
Expand All @@ -99,7 +173,7 @@ pub struct Chunks<'d> {
}

impl<'d> Chunks<'d> {
pub fn next_chunk(&mut self) -> Result<Option<Chunk<'d>>, ParserError> {
pub fn next_chunk(&mut self) -> Result<Option<Chunk<'d>>, ChunksFileParseError> {
if self.input.is_empty() {
return Ok(None);
}
Expand All @@ -123,9 +197,10 @@ impl<'d> Chunks<'d> {
}));
}

let header_bytes = next_line(&mut chunk_bytes).ok_or(ParserError::UnexpectedInput)?;
let chunk_header: ChunkHeader =
serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?;
let header_bytes =
next_line(&mut chunk_bytes).ok_or(ChunksFileParseError::UnexpectedInput)?;
let chunk_header: ChunkHeader = serde_json::from_slice(header_bytes)
.map_err(ChunksFileParseError::InvalidFileHeader)?;

Ok(Some(Chunk {
chunk_header,
Expand All @@ -144,7 +219,7 @@ impl<'d> Chunk<'d> {
&self.chunk_header.present_sessions
}

pub fn next_line(&mut self) -> Result<Option<Option<LineRecord>>, ParserError> {
pub fn next_line(&mut self) -> Result<Option<Option<LineRecord>>, ChunksFileParseError> {
let Some(line) = next_line(&mut self.input) else {
return Ok(None);
};
Expand All @@ -154,7 +229,7 @@ impl<'d> Chunk<'d> {
}

let line_record: LineRecord =
serde_json::from_slice(line).map_err(ParserError::InvalidLineRecord)?;
serde_json::from_slice(line).map_err(ChunksFileParseError::InvalidLineRecord)?;
return Ok(Some(Some(line_record)));
}
}
Expand Down Expand Up @@ -217,7 +292,7 @@ pub struct LineRecord(
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
pub struct LineSession(
/// session id
u32,
usize,
/// coverage
Coverage,
/// TODO: branches
Expand Down Expand Up @@ -260,6 +335,18 @@ pub enum Coverage {
HitCount(u32),
}

impl Into<PyreportCoverage> for Coverage {
fn into(self) -> PyreportCoverage {
match self {
Coverage::Partial => PyreportCoverage::Partial(),
Coverage::BranchTaken(covered, total) => {
PyreportCoverage::BranchesTaken { covered, total }
}
Coverage::HitCount(hits) => PyreportCoverage::HitCount(hits),
}
}
}

impl<'de> Deserialize<'de> for Coverage {
fn deserialize<D>(deserializer: D) -> Result<Coverage, D::Error>
where
Expand Down
17 changes: 17 additions & 0 deletions core/src/report/pyreport/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,23 @@ pub struct ReportLine {
pub datapoints: Option<Option<HashMap<u32, CoverageDatapoint>>>,
}

impl ReportLine {
pub fn normalize(&mut self) {
// Fix issues like recording branch coverage with `CoverageType::Method`
let (correct_coverage, correct_type) =
normalize_coverage_measurement(&self.coverage, &self.coverage_type);
self.coverage = correct_coverage;
self.coverage_type = correct_type;

// Fix the `coverage` values in each `LineSession` as well
for line_session in &mut self.sessions {
let (correct_coverage, _) =
normalize_coverage_measurement(&line_session.coverage, &self.coverage_type);
line_session.coverage = correct_coverage;
}
}
}

/// Account for some quirks and malformed data. See code comments for details.
pub(crate) fn normalize_coverage_measurement(
coverage: &PyreportCoverage,
Expand Down

0 comments on commit 12ee764

Please sign in to comment.