From 618e703a292d292c76c0395d447e24281afa33e1 Mon Sep 17 00:00:00 2001
From: Arpad Borsos <arpad.borsos@sentry.io>
Date: Tue, 17 Dec 2024 14:50:02 +0100
Subject: [PATCH] finish up most of the python bindings

---
 benches/binary.rs         |   1 +
 src/binary/bindings.rs    | 108 ++++++++++++++++++++++++++++++++------
 src/binary/format.rs      |  27 +++++-----
 src/binary/mod.rs         |  44 ++++++++--------
 src/lib.rs                |   5 ++
 tests/test_aggregation.py |  45 ++++++++++++++++
 6 files changed, 179 insertions(+), 51 deletions(-)
 create mode 100644 tests/test_aggregation.py
diff --git a/benches/binary.rs b/benches/binary.rs
index 17fcf3e..065ecff 100644
--- a/benches/binary.rs
+++ b/benches/binary.rs
@@ -38,6 +38,7 @@ fn binary(c: &mut Criterion) {
             b.iter(|| {
                 let parsed = TestAnalytics::parse(&buf, 0).unwrap();
                 for test in parsed.tests(0..60, None).unwrap() {
+                    let test = test.unwrap();
                     let _name = black_box(test.name().unwrap());
                     let _aggregates = black_box(test.aggregates());
                 }
diff --git a/src/binary/bindings.rs b/src/binary/bindings.rs
index 4dca908..b600e78 100644
--- a/src/binary/bindings.rs
+++ b/src/binary/bindings.rs
@@ -9,21 +9,33 @@ use super::{TestAnalytics, TestAnalyticsWriter};
 
 #[pyclass]
 pub struct BinaryFormatWriter {
-    writer: TestAnalyticsWriter,
+    writer: Option<TestAnalyticsWriter>,
 }
 
+#[pymethods]
 impl BinaryFormatWriter {
+    #[new]
     pub fn new() -> Self {
         Self {
-            writer: TestAnalyticsWriter::new(60),
+            writer: Some(TestAnalyticsWriter::new(60)),
         }
     }
+
+    #[staticmethod]
+    pub fn open(buffer: &[u8]) -> anyhow::Result<Self> {
+        let format = TestAnalytics::parse(buffer, 0)?;
+        let writer = TestAnalyticsWriter::from_existing_format(&format)?;
+        Ok(Self {
+            writer: Some(writer),
+        })
+    }
+
     pub fn add_testruns(
         &mut self,
         timestamp: u32,
         commit_hash: &str,
-        flags: &[&str],
-        testruns: &[Testrun],
+        flags: Vec<String>,
+        testruns: Vec<Testrun>,
     ) -> anyhow::Result<()> {
         let commit_hash_base16 = if commit_hash.len() > 40 {
             commit_hash
@@ -35,29 +47,61 @@ impl BinaryFormatWriter {
         let mut commit_hash = super::CommitHash::default();
         base16ct::mixed::decode(commit_hash_base16, &mut commit_hash.0)?;
 
-        let mut session = self.writer.start_session(timestamp, commit_hash, flags);
+        let writer = self
+            .writer
+            .as_mut()
+            .context("writer was already serialized")?;
+
+        let flags: Vec<_> = flags.iter().map(|s| s.as_str()).collect();
+        let mut session = writer.start_session(timestamp, commit_hash, &flags);
         for test in testruns {
-            session.insert(test);
+            session.insert(&test);
         }
         Ok(())
     }
 
-    pub fn serialize(self) -> anyhow::Result<Vec<u8>> {
+    pub fn serialize(&mut self) -> anyhow::Result<Vec<u8>> {
+        let writer = self
+            .writer
+            .take()
+            .context("writer was already serialized")?;
         let mut buffer = vec![];
-        self.writer.serialize(&mut buffer)?;
+        writer.serialize(&mut buffer)?;
         Ok(buffer)
     }
 }
 
 #[pyclass]
 pub struct AggregationReader {
-    buffer: Vec<u8>,
+    _buffer: Vec<u8>,
     format: TestAnalytics<'static>,
 }
 
-#[pyclass]
+#[pyclass(get_all)]
 pub struct TestAggregate {
-    // TODO
+    pub name: String,
+    // TODO:
+    pub test_id: String,
+
+    pub testsuite: Option<String>,
+    pub flags: Vec<String>,
+
+    pub failure_rate: f32,
+    pub flake_rate: f32,
+
+    // TODO:
+    pub updated_at: u32,
+    pub avg_duration: f64,
+
+    pub total_fail_count: u32,
+    pub total_flaky_fail_count: u32,
+    pub total_pass_count: u32,
+    pub total_skip_count: u32,
+
+    pub commits_where_fail: usize,
+
+    // TODO:
+    pub last_duration: f32,
 }
 
 #[pymethods]
@@ -69,16 +113,48 @@ impl AggregationReader {
         // which we do not mutate, and which outlives the parsed format.
         let format = unsafe { transmute(format) };
 
-        Ok(Self { buffer, format })
+        Ok(Self {
+            _buffer: buffer,
+            format,
+        })
     }
 
-    #[pyo3(signature = (interval_start, interval_end, flag=None))]
+    #[pyo3(signature = (interval_start, interval_end, flags=None))]
     pub fn get_test_aggregates(
         &self,
         interval_start: usize,
         interval_end: usize,
-        flag: Option<&str>,
-    ) -> Vec<TestAggregate> {
-        vec![]
+        flags: Option<Vec<String>>,
+    ) -> anyhow::Result<Vec<TestAggregate>> {
+        let flags: Option<Vec<_>> = flags
+            .as_ref()
+            .map(|flags| flags.iter().map(|flag| flag.as_str()).collect());
+        let desired_range = interval_start..interval_end;
+
+        let tests = self.format.tests(desired_range, flags.as_deref())?;
+        let mut collected_tests = vec![];
+
+        for test in tests {
+            let test = test?;
+
+            collected_tests.push(TestAggregate {
+                name: test.name()?.into(),
+                test_id: "TODO".into(),
+                testsuite: Some(test.testsuite()?.into()),
+                flags: test.flags()?.into_iter().map(|s| s.into()).collect(),
+                failure_rate: test.aggregates().failure_rate,
+                flake_rate: test.aggregates().flake_rate,
+                updated_at: 0, // TODO
+                avg_duration: test.aggregates().avg_duration,
+                total_fail_count: test.aggregates().total_fail_count,
+                total_flaky_fail_count: test.aggregates().total_flaky_fail_count,
+                total_pass_count: test.aggregates().total_pass_count,
+                total_skip_count: test.aggregates().total_skip_count,
+                commits_where_fail: test.aggregates().failing_commits,
+                last_duration: 0., // TODO
+            });
+        }
+
+        Ok(collected_tests)
     }
 }
diff --git a/src/binary/format.rs b/src/binary/format.rs
index a1951b3..b0f9a77 100644
--- a/src/binary/format.rs
+++ b/src/binary/format.rs
@@ -86,15 +86,18 @@ impl<'data> TestAnalytics<'data> {
     pub fn tests(
         &self,
         desired_range: Range<usize>,
-        flag: Option<&str>,
-    ) -> Result<impl Iterator<Item = Test<'data, '_>> + '_, TestAnalyticsError> {
-        let matching_flags_sets = if let Some(flag) = flag {
+        flags: Option<&[&str]>,
+    ) -> Result<
+        impl Iterator<Item = Result<Test<'data, '_>, TestAnalyticsError>> + '_,
+        TestAnalyticsError,
+    > {
+        let matching_flags_sets = if let Some(flags) = flags {
             let flag_sets = self.flags_set.iter(self.string_bytes);
 
             let mut matching_flags_sets: SmallVec<u32, 4> = Default::default();
             for res in flag_sets {
-                let (offset, flags) = res?;
-                if flags.contains(&flag) {
+                let (offset, flag_set) = res?;
+                if flags.iter().any(|flag| flag_set.contains(&flag.as_ref())) {
                     matching_flags_sets.push(offset);
                 }
             }
@@ -132,11 +135,11 @@ impl<'data> TestAnalytics<'data> {
                 &self.testdata[adjusted_range],
             );
 
-            Some(Test {
+            Some(aggregates.map(|aggregates| Test {
                 container: self,
                 data: test,
                 aggregates,
-            })
+            }))
         });
         Ok(tests)
     }
@@ -211,7 +214,7 @@ impl Aggregates {
         commithashes_bytes: &[u8],
         all_failing_commits: &mut HashSet<CommitHash>,
         data: &[raw::TestData],
-    ) -> Self {
+    ) -> Result<Self, TestAnalyticsError> {
         let mut total_pass_count = 0;
         let mut total_fail_count = 0;
         let mut total_skip_count = 0;
@@ -225,10 +228,8 @@ impl Aggregates {
             total_flaky_fail_count += testdata.total_flaky_fail_count as u32;
             total_duration += testdata.total_duration as f64;
 
-            // TODO: make sure we validate this data ahead of time!
             let failing_commits =
-                CommitHashesSet::read_raw(commithashes_bytes, testdata.failing_commits_set)
-                    .unwrap();
+                CommitHashesSet::read_raw(commithashes_bytes, testdata.failing_commits_set)?;
             all_failing_commits.extend(failing_commits);
         }
 
@@ -246,7 +247,7 @@ impl Aggregates {
             (0., 0., 0.)
         };
 
-        Aggregates {
+        Ok(Aggregates {
             total_pass_count,
             total_fail_count,
             total_skip_count,
@@ -258,6 +259,6 @@ impl Aggregates {
             avg_duration,
 
             failing_commits,
-        }
+        })
     }
 }
diff --git a/src/binary/mod.rs b/src/binary/mod.rs
index 01ccb95..743efc2 100644
--- a/src/binary/mod.rs
+++ b/src/binary/mod.rs
@@ -7,7 +7,7 @@ mod raw;
 mod timestamps;
 mod writer;
 
-pub use bindings::{AggregationReader, BinaryFormatWriter};
+pub use bindings::{AggregationReader, BinaryFormatWriter, TestAggregate};
 pub use error::{TestAnalyticsError, TestAnalyticsErrorKind};
 pub use format::{Test, TestAnalytics};
 pub use raw::CommitHash;
@@ -71,14 +71,14 @@ mod tests {
         let parsed = TestAnalytics::parse(&buf, 0).unwrap();
         let mut tests = parsed.tests(0..60, None).unwrap();
 
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
         assert_eq!(aggregates.total_fail_count, 1);
         assert_eq!(aggregates.avg_duration, 1.5);
 
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "def");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_skip_count, 1);
@@ -103,11 +103,11 @@ mod tests {
         let parsed = TestAnalytics::parse(&buf, 0).unwrap();
         let mut tests = parsed.tests(0..60, None).unwrap();
 
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.testsuite().unwrap(), "");
         assert_eq!(abc.name().unwrap(), "abc");
 
-        let abc_with_testsuite = tests.next().unwrap();
+        let abc_with_testsuite = tests.next().unwrap().unwrap();
         assert_eq!(abc_with_testsuite.testsuite().unwrap(), "some testsuite");
         assert_eq!(abc_with_testsuite.name().unwrap(), "abc");
 
@@ -131,7 +131,7 @@ mod tests {
         let parsed = TestAnalytics::parse(&buf, 0).unwrap();
         let mut tests = parsed.tests(0..1, None).unwrap();
 
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
@@ -149,7 +149,7 @@ mod tests {
         // the data should be in the "yesterday" bucket
         let mut tests = parsed.tests(1..2, None).unwrap();
 
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
@@ -183,7 +183,7 @@ mod tests {
 
         // we should have data in the "today" bucket
         let mut tests = parsed.tests(0..1, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
@@ -192,7 +192,7 @@ mod tests {
 
         // as well as in the "yesterday" bucket
         let mut tests = parsed.tests(1..2, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
@@ -233,7 +233,7 @@ mod tests {
 
         // we should have data in the "today" bucket
         let mut tests = parsed.tests(0..1, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
@@ -242,7 +242,7 @@ mod tests {
 
         // as well as in the "yesterday" bucket
         let mut tests = parsed.tests(1..2, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
@@ -279,7 +279,7 @@ mod tests {
         // nothing garbage collected yet,
         // we should have data in the "yesterday" bucket
         let mut tests = parsed.tests(1..2, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         let aggregates = abc.aggregates();
         assert_eq!(aggregates.total_pass_count, 1);
@@ -319,25 +319,25 @@ mod tests {
         let mut tests = parsed.tests(0..60, None).unwrap();
 
         // we get the test twice, with two different flags
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         assert_eq!(abc.flags().unwrap(), &["flag-a"]);
 
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         assert_eq!(abc.flags().unwrap(), &["flag-b"]);
 
         assert!(tests.next().is_none());
 
         // if we filter for flags, we get only matching tests:
-        let mut tests = parsed.tests(0..60, Some("flag-a")).unwrap();
+        let mut tests = parsed.tests(0..60, Some(&["flag-a"])).unwrap();
 
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         assert_eq!(abc.flags().unwrap(), &["flag-a"]);
         assert!(tests.next().is_none());
 
-        let mut tests = parsed.tests(0..60, Some("non-existing")).unwrap();
+        let mut tests = parsed.tests(0..60, Some(&["non-existing"])).unwrap();
         assert!(tests.next().is_none());
     }
 
@@ -364,13 +364,13 @@ mod tests {
 
         // when filtering for "yesterday", we get valid data
         let mut tests = parsed.tests(1..2, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         assert!(tests.next().is_none());
 
         // also when filtering for two days prior to that
         let mut tests = parsed.tests(2..4, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.name().unwrap(), "abc");
         assert!(tests.next().is_none());
 
@@ -425,17 +425,17 @@ mod tests {
         let parsed = TestAnalytics::parse(&buf, 3 * DAY).unwrap();
 
         let mut tests = parsed.tests(0..1, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.aggregates().failing_commits, 1); // commit 4
         assert!(tests.next().is_none());
 
         let mut tests = parsed.tests(2..3, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.aggregates().failing_commits, 2); // commit 1, commit 2
         assert!(tests.next().is_none());
 
         let mut tests = parsed.tests(0..60, None).unwrap();
-        let abc = tests.next().unwrap();
+        let abc = tests.next().unwrap().unwrap();
         assert_eq!(abc.aggregates().failing_commits, 4); // commit 1 - 4
         assert!(tests.next().is_none());
     }
diff --git a/src/lib.rs b/src/lib.rs
index 4750ae8..b1ee782 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -25,5 +25,10 @@ fn test_results_parser(py: Python, m: &Bound<PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(failure_message::build_message, m)?)?;
     m.add_function(wrap_pyfunction!(failure_message::escape_message, m)?)?;
     m.add_function(wrap_pyfunction!(failure_message::shorten_file_paths, m)?)?;
+
+    m.add_class::<binary::AggregationReader>()?;
+    m.add_class::<binary::BinaryFormatWriter>()?;
+    m.add_class::<binary::TestAggregate>()?;
+
     Ok(())
 }
diff --git a/tests/test_aggregation.py b/tests/test_aggregation.py
new file mode 100644
index 0000000..9c0e47b
--- /dev/null
+++ b/tests/test_aggregation.py
@@ -0,0 +1,45 @@
+from datetime import datetime, timezone
+
+from test_results_parser import (
+    parse_junit_xml,
+    AggregationReader,
+    BinaryFormatWriter,
+)
+
+def test_aggregation():
+    with open("./tests/junit.xml", "br") as f:
+        junit_file = f.read()
+    parsed = parse_junit_xml(junit_file)
+
+    now = int(datetime.now(timezone.utc).timestamp())
+
+    writer = BinaryFormatWriter()
+    writer.add_testruns(
+        timestamp=now,
+        commit_hash="e9fcd08652d091fa0c8d28e323c24fb0f4acf249",
+        flags=["upload", "flags"],
+        testruns=parsed.testruns,
+    )
+
+    serialized = writer.serialize()
+    reader = AggregationReader(serialized, now)
+
+    tests = reader.get_test_aggregates(0, 2)
+    for test in tests:
+        test_dict = {
+            "name": test.name,
+            "test_id": test.test_id,# TODO
+            "testsuite": test.testsuite,
+            "flags": test.flags,
+            "failure_rate": test.failure_rate,
+            "flake_rate": test.flake_rate,
+            "updated_at":test.updated_at,# TODO
+            "avg_duration":test.avg_duration,
+            "total_fail_count":test.total_fail_count,
+            "total_flaky_fail_count":test.total_flaky_fail_count,
+            "total_pass_count":test.total_pass_count,
+            "total_skip_count":test.total_skip_count,
+            "commits_where_fail":test.commits_where_fail,
+            "last_duration":test.last_duration,# TODO
+        }
+        print(test_dict)
\ No newline at end of file