Skip to content

Commit

Permalink
Implement missing chunks parser features
Browse files Browse the repository at this point in the history
This should implement everything except for the `complexity` parser.
  • Loading branch information
Swatinem committed Nov 12, 2024
1 parent 596c305 commit 23bdede
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 182 deletions.
92 changes: 13 additions & 79 deletions core/benches/pyreport.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use std::collections::HashMap;

use codecov_rs::{
parsers::pyreport::{chunks, chunks_serde, report_json},
test_utils::test_report::{TestReport, TestReportBuilder},
parsers::pyreport::{chunks_serde, report_json},
test_utils::test_report::TestReportBuilder,
};
use criterion::{criterion_group, criterion_main, Criterion};
use test_utils::fixtures::{read_fixture, FixtureFormat::Pyreport, FixtureSize::Large};
use winnow::Parser as _;

criterion_group!(
benches,
Expand Down Expand Up @@ -55,24 +54,25 @@ fn parse_report_json(input: &[u8]) -> report_json::ParsedReportJson {
}

fn simple_chunks(c: &mut Criterion) {
let chunks = &[
let chunks: &[&[u8]] = &[
// Header and one chunk with an empty line
"{}\n<<<<< end_of_header >>>>>\n{}\n",
b"{}\n<<<<< end_of_header >>>>>\n{}\n",
// No header, one chunk with a populated line and an empty line
"{}\n[1, null, [[0, 1]]]\n",
b"{}\n[1, null, [[0, 1]]]\n",
// No header, two chunks, the second having just one empty line
"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
// Header, two chunks, the second having multiple data lines and an empty line
"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
];

let files = HashMap::from([(0, 0), (1, 1), (2, 2)]);
let sessions = HashMap::from([(0, 0), (1, 1), (2, 2)]);

let report_json = report_json::ParsedReportJson { files, sessions };
c.bench_function("simple_chunks", |b| {
b.iter(|| {
for input in chunks {
parse_chunks_file(input, files.clone(), sessions.clone())
parse_chunks_file_serde(input, report_json.clone());
}
})
});
Expand All @@ -87,7 +87,6 @@ fn complex_chunks(c: &mut Criterion) {
"worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt",
)
.unwrap();
let chunks = std::str::from_utf8(&chunks).unwrap();

// parsing the chunks depends on having loaded the `report_json`
let report = read_fixture(
Expand All @@ -96,79 +95,14 @@ fn complex_chunks(c: &mut Criterion) {
"worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json",
)
.unwrap();
let report_json::ParsedReportJson { files, sessions } = parse_report_json(&report);
let report_json = parse_report_json(&report);

c.bench_function("complex_chunks", |b| {
b.iter(|| parse_chunks_file(chunks, files.clone(), sessions.clone()))
b.iter(|| parse_chunks_file_serde(&chunks, report_json.clone()))
});
}

fn parse_chunks_file(input: &str, files: HashMap<usize, i64>, sessions: HashMap<usize, i64>) {
fn parse_chunks_file_serde(input: &[u8], report_json: report_json::ParsedReportJson) {
let report_builder = TestReportBuilder::default();

let chunks_ctx = chunks::ParseCtx::new(report_builder, files, sessions);
let mut chunks_stream = chunks::ReportOutputStream::<&str, TestReport, TestReportBuilder> {
input,
state: chunks_ctx,
};

chunks::parse_chunks_file
.parse_next(&mut chunks_stream)
.unwrap();
}

#[divan::bench]
fn simple_chunks_serde() {
let chunks: &[&[u8]] = &[
// Header and one chunk with an empty line
b"{}\n<<<<< end_of_header >>>>>\n{}\n",
// No header, one chunk with a populated line and an empty line
b"{}\n[1, null, [[0, 1]]]\n",
// No header, two chunks, the second having just one empty line
b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
// Header, two chunks, the second having multiple data lines and an empty line
b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
];

let report_json = report_json::ParsedReportJson {
files: Default::default(),
sessions: Default::default(),
};

for input in chunks {
parse_chunks_file_serde(input, &report_json);
}
}

// this is currently <300 ms on my machine
#[divan::bench(sample_count = 10)]
fn complex_chunks_serde(bencher: Bencher) {
// this is a ~96M `chunks` file
let chunks =
load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt");

// parsing the chunks depends on having loaded the `report_json`
let report = load_fixture(
"pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json",
);
let report_json = parse_report_json(&report);

bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json));
}

fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) {
let mut report_builder = TestReportBuilder::default();
chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap();
}

#[track_caller]
fn load_fixture(path: &str) -> Vec<u8> {
let path = format!("./fixtures/{path}");
let contents = std::fs::read(path).unwrap();

if contents.starts_with(b"version https://git-lfs.github.com/spec/v1") {
panic!("Fixture has not been pulled from Git LFS");
}

contents
chunks_serde::parse_chunks_file(input, report_json, report_builder).unwrap();
}
Loading

0 comments on commit 23bdede

Please sign in to comment.