diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 22ba40c..a6a1681 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -19,6 +19,8 @@ jobs: - run: cargo binstall cargo-codspeed - uses: Swatinem/rust-cache@v2 + - uses: actions/setup-python@v5 + - run: cargo codspeed build - uses: CodSpeedHQ/action@v3 with: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f53a868..77d4c62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,13 +6,12 @@ on: - main pull_request: -env: - RUSTFLAGS: -Dwarnings - jobs: lint: name: Lint runs-on: ubuntu-latest + env: + RUSTFLAGS: -Dwarnings steps: - uses: actions/checkout@v4 diff --git a/Cargo.lock b/Cargo.lock index f356133..51a9d5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,42 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anyhow" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" + [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "basic-toml" version = "0.1.9" @@ -26,18 +56,243 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "codspeed" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "450a0e9df9df1c154156f4344f99d8f6f6e69d0fc4de96ef6e2e68b2ec3bce97" +dependencies = [ + "colored", + "libc", + "serde_json", +] + +[[package]] +name = "codspeed-criterion-compat" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eb1a6cb9c20e177fde58cdef97c1c7c9264eb1424fe45c4fccedc2fb078a569" +dependencies = [ + "codspeed", + "colored", + "criterion", +] + +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys 0.59.0", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "humansize" version = "2.1.3" @@ -47,18 +302,70 @@ dependencies = [ "libm", ] +[[package]] +name = "indexmap" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "indoc" version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +[[package]] +name = "js-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" + [[package]] name = "libc" version = "0.2.169" @@ -71,6 +378,12 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + [[package]] name = "memchr" version = "2.7.4" @@ -118,24 +431,76 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "percent-encoding" version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portable-atomic" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro2" version = "1.0.92" @@ -151,6 +516,7 @@ version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e484fd2c8b4cb67ab05a318f1fd6fa8f199fcc30819f08f07d200809dba26c15" dependencies = [ + "anyhow", "cfg-if", "indoc", "libc", @@ -226,6 +592,56 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "regex" version = "1.11.1" @@ -308,6 +724,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.217" @@ -340,6 +765,12 @@ dependencies = [ "serde", ] +[[package]] +name = "smallvec" +version = "2.0.0-alpha.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e22442b16a0c1bfae679ffed8ec5e160ae2aa8495cea891f0d2ff7b84fe5c4c" + [[package]] name = "syn" version = "2.0.95" @@ -361,12 +792,50 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" name = "test_results_parser" version = "0.5.1" dependencies = [ + "anyhow", + "base16ct", + "codspeed-criterion-compat", + "indexmap", "pyo3", "quick-xml", + "rand", "regex", "rinja", "serde", "serde_json", + "smallvec", + "thiserror", + "watto", +] + +[[package]] +name = "thiserror" +version = "2.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", ] [[package]] @@ -386,3 +855,205 @@ name = "unindent" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" + +[[package]] +name = "watto" +version = "0.2.0" +source = "git+https://github.com/getsentry/watto#5ce326705ce5452449651b5b3e2070aedd30850a" +dependencies = [ + "hashbrown", + "leb128", + "thiserror", +] + +[[package]] +name = "web-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 437f351..b83f359 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,9 +8,30 @@ name = "test_results_parser" crate-type = ["cdylib", "rlib"] [dependencies] -pyo3 = { version = "0.23.3", features = ["abi3-py312"] } +anyhow = "1.0.94" +base16ct = { version = "0.2.0", features = ["std"] } +indexmap = "2.6.0" +pyo3 = { version = "0.23.3", features = ["abi3-py312", "anyhow"] } quick-xml = "0.37.1" regex = "1.11.1" +rinja = "0.3.5" serde = { version = "1.0.215", features = ["derive"] } serde_json = "1.0.133" -rinja = "0.3.5" +smallvec = "2.0.0-alpha.7" +thiserror = "2.0.3" +watto = { git = "https://github.com/getsentry/watto", features = [ + "writer", + "strings", + "offset_set", +] } + +[dev-dependencies] +criterion = { version = "2.7.2", package = "codspeed-criterion-compat" } +rand = { version = "0.8.5", features = ["small_rng"] } + +[profile.release] +debug = 1 + +[[bench]] +name = "binary" +harness = false diff --git a/benches/binary.rs b/benches/binary.rs new file mode 100644 index 0000000..065ecff --- /dev/null +++ b/benches/binary.rs @@ -0,0 +1,209 @@ +use std::hint::black_box; + +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::distributions::{Alphanumeric, DistString, Distribution, Uniform, WeightedIndex}; +use rand::rngs::SmallRng; +use rand::seq::SliceRandom as _; +use rand::{Rng, SeedableRng}; +use test_results_parser::binary::*; +use test_results_parser::{Outcome, Testrun}; + +criterion_group!(benches, binary); +criterion_main!(benches); + +const NUM_UPLOADS: usize = 10; +const NUM_TESTS_PER_UPLOAD: usize = 10_000; +const DAY: u32 = 24 * 60 * 60; + +fn binary(c: &mut Criterion) { + let rng = &mut SmallRng::seed_from_u64(0); + + let mut uploads = create_random_testcases(rng, NUM_UPLOADS, NUM_TESTS_PER_UPLOAD); + randomize_test_data(rng, &mut uploads); + + let buf = write_tests(&uploads, 60, 0); + let buf_1 = write_tests(&uploads[..NUM_UPLOADS - 1], 60, 0); + randomize_test_data(rng, &mut uploads); + let buf_2 = write_tests(&uploads[1..], 60, 1 * DAY); + + c.benchmark_group("binary") + .throughput(criterion::Throughput::Elements(NUM_TESTS_PER_UPLOAD as u64)) + .sample_size(10) // because with the configured `NUM_TESTS`, each iteration would run >100ms + .bench_function("create_and_serialize", |b| { + b.iter(|| { + write_tests(&uploads, 60, 0); + }) + }) + .bench_function("read_aggregation", |b| { + b.iter(|| { + let parsed = TestAnalytics::parse(&buf, 0).unwrap(); + for test in parsed.tests(0..60, None).unwrap() { + let test = test.unwrap(); + let _name = black_box(test.name().unwrap()); + let _aggregates = black_box(test.aggregates()); + } + }) + }) + .bench_function("update_same", |b| { + b.iter(|| { + let parsed = TestAnalytics::parse(&buf, 1).unwrap(); + let mut writer = TestAnalyticsWriter::from_existing_format(&parsed).unwrap(); + let mut flags = vec![]; + for upload in &uploads { + flags.clear(); + flags.extend(upload.flags.iter().map(String::as_str)); + let mut session = writer.start_session(1, CommitHash::default(), &flags); + for test in &upload.tests { + session.insert(test); + } + } + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + buf + }) + }) + .bench_function("update_different", |b| { + b.iter(|| { + let parsed = TestAnalytics::parse(&buf_1, 1 * DAY).unwrap(); + let mut writer = TestAnalyticsWriter::from_existing_format(&parsed).unwrap(); + let mut flags = vec![]; + for upload in uploads.iter().skip(1) { + flags.clear(); + flags.extend(upload.flags.iter().map(String::as_str)); + let mut session = writer.start_session(1, CommitHash::default(), &flags); + for test in &upload.tests { + session.insert(test); + } + } + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + buf + }) + }) + .bench_function("merge", |b| { + b.iter(|| { + let parsed_1 = TestAnalytics::parse(&buf_1, 1 * DAY).unwrap(); + let parsed_2 = TestAnalytics::parse(&buf_2, 1 * DAY).unwrap(); + let writer = TestAnalyticsWriter::merge(&parsed_1, &parsed_2).unwrap(); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + buf + }) + }) + .bench_function("merge_rewrite", |b| { + b.iter(|| { + let parsed_1 = TestAnalytics::parse(&buf_1, 1 * DAY).unwrap(); + let parsed_2 = TestAnalytics::parse(&buf_2, 1 * DAY).unwrap(); + let mut writer = TestAnalyticsWriter::merge(&parsed_1, &parsed_2).unwrap(); + + writer.rewrite(60, 1 * DAY, Some(0)).unwrap(); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + buf + }) + }); +} + +fn write_tests(uploads: &[Upload], num_days: usize, timestamp: u32) -> Vec { + let mut writer = TestAnalyticsWriter::new(num_days); + let mut flags = vec![]; + for upload in uploads { + flags.clear(); + flags.extend(upload.flags.iter().map(String::as_str)); + let mut session = writer.start_session(timestamp, CommitHash::default(), &flags); + for test in &upload.tests { + session.insert(test); + } + } + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + buf +} + +struct Upload { + flags: Vec, + tests: Vec, +} + +/// Generates a random set of `num_flags` flags. +fn create_random_flags(rng: &mut impl Rng, num_flags: usize) -> Vec { + let flag_lens = Uniform::from(5usize..10); + (0..num_flags) + .map(|_| { + let flag_len = flag_lens.sample(rng); + Alphanumeric.sample_string(rng, flag_len) + }) + .collect() +} + +/// Samples random combinations of flags with length `max_flags_in_set`. +fn sample_flag_sets<'a>( + rng: &'a mut impl Rng, + flags: &'a [String], + max_flags_in_set: usize, +) -> impl Iterator> + 'a { + let num_flags = Uniform::from(0..max_flags_in_set); + std::iter::from_fn(move || { + let num_flags = num_flags.sample(rng); + let flags: Vec<_> = flags.choose_multiple(rng, num_flags).cloned().collect(); + Some(flags) + }) +} + +fn create_random_testcases( + rng: &mut impl Rng, + num_uploads: usize, + num_tests_per_upload: usize, +) -> Vec { + let flags = create_random_flags(rng, 5); + let flag_sets: Vec<_> = sample_flag_sets(rng, &flags, 3) + .take(num_uploads / 3) + .collect(); + let name_lens = Uniform::from(5usize..50); + + (0..num_uploads) + .map(|_| { + let flags = flag_sets.choose(rng).cloned().unwrap_or_default(); + let tests = (0..num_tests_per_upload) + .map(|_| { + let name_len = name_lens.sample(rng); + let name = Alphanumeric.sample_string(rng, name_len); + + Testrun { + name, + classname: "".into(), + duration: Some(1.0), + outcome: Outcome::Pass, + testsuite: "".into(), + failure_message: None, + filename: None, + build_url: None, + computed_name: None, + } + }) + .collect(); + Upload { flags, tests } + }) + .collect() +} + +fn randomize_test_data(rng: &mut impl Rng, uploads: &mut [Upload]) { + let durations = Uniform::from(0f64..10f64); + let outcomes = WeightedIndex::new([1000, 10, 20]).unwrap(); + + for upload in uploads { + for test in &mut upload.tests { + test.duration = Some(durations.sample(rng)); + test.outcome = match outcomes.sample(rng) { + 0 => Outcome::Pass, + 1 => Outcome::Skip, + _ => Outcome::Failure, + }; + } + } +} diff --git a/src/binary/bindings.rs b/src/binary/bindings.rs new file mode 100644 index 0000000..fc7fe53 --- /dev/null +++ b/src/binary/bindings.rs @@ -0,0 +1,161 @@ +use std::mem::transmute; + +use anyhow::Context; +use pyo3::prelude::*; + +use crate::Testrun; + +use super::{TestAnalytics, TestAnalyticsWriter}; + +#[pyclass] +pub struct BinaryFormatWriter { + writer: Option, +} + +#[pymethods] +impl BinaryFormatWriter { + #[new] + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + Self { + writer: Some(TestAnalyticsWriter::new(60)), + } + } + + #[staticmethod] + pub fn open(buffer: &[u8]) -> anyhow::Result { + let format = TestAnalytics::parse(buffer, 0)?; + let writer = TestAnalyticsWriter::from_existing_format(&format)?; + Ok(Self { + writer: Some(writer), + }) + } + + pub fn add_testruns( + &mut self, + timestamp: u32, + commit_hash: &str, + flags: Vec, + testruns: Vec, + ) -> anyhow::Result<()> { + let commit_hash_base16 = if commit_hash.len() > 40 { + commit_hash + .get(..40) + .context("expected a hex-encoded commit hash")? + } else { + commit_hash + }; + let mut commit_hash = super::CommitHash::default(); + base16ct::mixed::decode(commit_hash_base16, &mut commit_hash.0)?; + + let writer = self + .writer + .as_mut() + .context("writer was already serialized")?; + + let flags: Vec<_> = flags.iter().map(|s| s.as_str()).collect(); + let mut session = writer.start_session(timestamp, commit_hash, &flags); + for test in testruns { + session.insert(&test); + } + Ok(()) + } + + pub fn serialize(&mut self) -> anyhow::Result> { + let writer = self + .writer + .take() + .context("writer was already serialized")?; + let mut buffer = vec![]; + writer.serialize(&mut buffer)?; + Ok(buffer) + } +} + +#[pyclass] +pub struct AggregationReader { + _buffer: Vec, + format: TestAnalytics<'static>, +} + +#[pyclass(get_all)] +pub struct TestAggregate { + pub name: String, + // TODO: + pub test_id: String, + + pub testsuite: Option, + pub flags: Vec, + + pub failure_rate: f32, + pub flake_rate: f32, + + // TODO: + pub updated_at: u32, + pub avg_duration: f64, + + pub total_fail_count: u32, + pub total_flaky_fail_count: u32, + pub total_pass_count: u32, + pub total_skip_count: u32, + + pub commits_where_fail: usize, + + // TODO: + pub last_duration: f32, +} + +#[pymethods] +impl AggregationReader { + #[new] + pub fn new(buffer: Vec, timestamp: u32) -> anyhow::Result { + let format = TestAnalytics::parse(&buffer, timestamp)?; + // SAFETY: the lifetime of `TestAnalytics` depends on `buffer`, + // which we do not mutate, and which outlives the parsed format. + let format = unsafe { transmute::, TestAnalytics<'_>>(format) }; + + Ok(Self { + _buffer: buffer, + format, + }) + } + + #[pyo3(signature = (interval_start, interval_end, flags=None))] + pub fn get_test_aggregates( + &self, + interval_start: usize, + interval_end: usize, + flags: Option>, + ) -> anyhow::Result> { + let flags: Option> = flags + .as_ref() + .map(|flags| flags.iter().map(|flag| flag.as_str()).collect()); + let desired_range = interval_start..interval_end; + + let tests = self.format.tests(desired_range, flags.as_deref())?; + let mut collected_tests = vec![]; + + for test in tests { + let test = test?; + + collected_tests.push(TestAggregate { + name: test.name()?.into(), + test_id: "TODO".into(), + testsuite: Some(test.testsuite()?.into()), + flags: test.flags()?.into_iter().map(|s| s.into()).collect(), + failure_rate: test.aggregates().failure_rate, + flake_rate: test.aggregates().flake_rate, + updated_at: 0, // TODO + avg_duration: test.aggregates().avg_duration, + total_fail_count: test.aggregates().total_fail_count, + total_flaky_fail_count: test.aggregates().total_flaky_fail_count, + total_pass_count: test.aggregates().total_pass_count, + total_skip_count: test.aggregates().total_skip_count, + commits_where_fail: test.aggregates().failing_commits, + last_duration: 0., // TODO + }); + } + + Ok(collected_tests) + } +} diff --git a/src/binary/commithashes_set.rs b/src/binary/commithashes_set.rs new file mode 100644 index 0000000..eb46cda --- /dev/null +++ b/src/binary/commithashes_set.rs @@ -0,0 +1,72 @@ +use watto::OffsetSet; + +use super::*; + +#[derive(Debug)] +pub struct CommitHashesSet { + inner: OffsetSet, + temp_hashes: Vec, +} + +impl CommitHashesSet { + pub fn new() -> Self { + let mut inner = OffsetSet::::default(); + // NOTE: this reserves offset `0` for the empty set + inner.insert(&[]); + + Self { + inner, + temp_hashes: Default::default(), + } + } + + pub fn from_bytes(bytes: &[u8]) -> Result { + let inner = OffsetSet::::from_bytes(bytes) + .map_err(|_| TestAnalyticsErrorKind::InvalidCommitSetReference)?; + Ok(Self { + inner, + temp_hashes: Default::default(), + }) + } + + pub fn into_bytes(self) -> Vec { + self.inner.into_bytes() + } + + pub fn read_raw(bytes: &[u8], offset: u32) -> Result<&[raw::CommitHash], TestAnalyticsError> { + if offset == 0 { + return Ok(&[]); + } + Ok(OffsetSet::::read(bytes, offset as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidCommitSetReference)?) + } + + pub fn read(&self, offset: u32) -> &[raw::CommitHash] { + Self::read_raw(self.inner.as_bytes(), offset).unwrap() + } + + /// Appends the `commit_hashes` to the existing set referenced by `existing_offset`. + /// + /// This returns a new offset in case any new commit hash was added, or returns + /// the `existing_offset` unmodified in case the existing set already includes + /// all the `commit_hashes`. + pub fn append_intersection( + &mut self, + existing_offset: u32, + commit_hashes: &[raw::CommitHash], + ) -> u32 { + let existing_hashes = + OffsetSet::::read(self.inner.as_bytes(), existing_offset as usize) + .unwrap(); + + self.temp_hashes.extend_from_slice(existing_hashes); + self.temp_hashes.extend_from_slice(commit_hashes); + self.temp_hashes.sort(); + self.temp_hashes.dedup(); + + let offset = self.inner.insert(&self.temp_hashes); + self.temp_hashes.clear(); + + offset as u32 + } +} diff --git a/src/binary/error.rs b/src/binary/error.rs new file mode 100644 index 0000000..6e6f744 --- /dev/null +++ b/src/binary/error.rs @@ -0,0 +1,61 @@ +use thiserror::Error; + +#[cfg(doc)] +use super::*; + +/// The kind of a [`TestAnalyticsError`]. +#[derive(Debug, Clone, Copy, Error)] +#[non_exhaustive] +pub enum TestAnalyticsErrorKind { + /// The file header could not be read. + #[error("could not read header")] + InvalidHeader, + /// The cache file header does not contain the correct magic bytes. + #[error("invalid magic: {0}")] + InvalidMagic(u32), + /// The cache file header contains an invalid version. + #[error("wrong version: {0}")] + WrongVersion(u32), + /// One of the tables could not be read from the file. + #[error("could not read data tables")] + InvalidTables, + /// The header claimed an incorrect number of string bytes. + #[error("expected {expected} string bytes, found {found}")] + UnexpectedStringBytes { + /// Expected number of string bytes. + expected: usize, + /// Number of string bytes actually found in the cache file. + found: usize, + }, + /// The string reference was invalid + #[error("could not resolve string reference")] + InvalidStringReference, + /// The flag set reference was invalid + #[error("could not resolve flag set reference")] + InvalidFlagSetReference, + /// The commit set reference was invalid + #[error("could not resolve commit set reference")] + InvalidCommitSetReference, +} + +/// An error encountered during [`TestAnalytics`] creation or parsing. +#[derive(Debug, Error)] +#[error("{kind}")] +pub struct TestAnalyticsError { + pub(crate) kind: TestAnalyticsErrorKind, + #[source] + pub(crate) source: Option>, +} + +impl TestAnalyticsError { + /// Returns the corresponding [`TestAnalyticsErrorKind`] for this error. + pub fn kind(&self) -> TestAnalyticsErrorKind { + self.kind + } +} + +impl From for TestAnalyticsError { + fn from(kind: TestAnalyticsErrorKind) -> Self { + Self { kind, source: None } + } +} diff --git a/src/binary/flags_set.rs b/src/binary/flags_set.rs new file mode 100644 index 0000000..a3a6e45 --- /dev/null +++ b/src/binary/flags_set.rs @@ -0,0 +1,117 @@ +use std::borrow::Cow; +use std::collections::HashMap; + +use smallvec::SmallVec; +use watto::StringTable; + +use super::{TestAnalyticsError, TestAnalyticsErrorKind}; + +type FlagsMap = HashMap, u32>; + +#[derive(Debug, Default, Clone)] +pub struct FlagsSet<'table> { + pub(crate) map: FlagsMap, + pub(crate) table: Cow<'table, [u32]>, +} + +impl<'table> FlagsSet<'table> { + /// Inserts the given `flags`, using the given `string_table` as string buffer. + pub fn insert(&mut self, string_table: &mut StringTable, flags: &[&str]) -> u32 { + let mut flags: SmallVec<&str, 4> = flags.into(); + flags.sort(); + flags.dedup(); + + let flags = flags + .iter() + .map(|flag| string_table.insert(flag) as u32) + .collect(); + + *self.map.entry(flags).or_insert_with_key(|flags| { + let offset = self.table.len(); + let table = self.table.to_mut(); + table.push(flags.len() as u32); + table.extend(flags); + offset as u32 + }) + } + + /// Loads the flags `table`, validating that all flag names are valid `string_table` references. + pub fn load(string_bytes: &[u8], table: &'table [u32]) -> Result { + let mut map = FlagsMap::default(); + let mut offset = 0; + let mut rest = table; + + while let Some((len, new_rest)) = rest.split_first() { + let (flags, new_rest) = new_rest + .split_at_checked(*len as usize) + .ok_or(TestAnalyticsErrorKind::InvalidTables)?; + + // validate all the flags + for flag in flags { + StringTable::read(string_bytes, *flag as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + } + + map.insert(flags.into(), offset); + + offset += 1 + len; + rest = new_rest; + } + + Ok(Self { + map, + table: table.into(), + }) + } + + pub fn iter<'slf, 'strings>( + &'slf self, + string_bytes: &'strings [u8], + ) -> impl ExactSizeIterator), TestAnalyticsError>> + + use<'slf, 'strings> { + self.map.iter().map(|(flags_offsets, offset)| { + let mut flags: SmallVec<&'strings str, 4> = + SmallVec::with_capacity(flags_offsets.len()); + for flag in flags_offsets { + let flag = StringTable::read(string_bytes, *flag as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + flags.push(flag); + } + + Ok((*offset, flags)) + }) + } + + pub fn resolve<'strings>( + &self, + string_bytes: &'strings [u8], + offset: u32, + ) -> Result, TestAnalyticsError> { + let len = self + .table + .get(offset as usize) + .ok_or(TestAnalyticsErrorKind::InvalidFlagSetReference)?; + let len = *len as usize; + let range_start = offset as usize + 1; + let range = range_start..range_start + len; + let flags_raw = self + .table + .get(range) + .ok_or(TestAnalyticsErrorKind::InvalidFlagSetReference)?; + + let mut flags = SmallVec::with_capacity(len); + for flag in flags_raw { + let string = StringTable::read(string_bytes, *flag as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + flags.push(string); + } + Ok(flags) + } + + pub fn to_owned(&self) -> FlagsSet<'static> { + FlagsSet { + map: self.map.clone(), + table: self.table.clone().into_owned().into(), + } + } +} diff --git a/src/binary/format.rs b/src/binary/format.rs new file mode 100644 index 0000000..80e1379 --- /dev/null +++ b/src/binary/format.rs @@ -0,0 +1,264 @@ +use std::collections::HashSet; +use std::fmt; +use std::ops::Range; + +use commithashes_set::CommitHashesSet; +use flags_set::FlagsSet; +use smallvec::SmallVec; +use timestamps::{adjust_selection_range, offset_from_today}; +use watto::Pod; + +use super::*; + +/// The current format version. +pub(crate) const TA_VERSION: u32 = 1; + +/// The serialized [`TestAnalytics`] binary format. +/// +/// This can be parsed from a binary buffer via [`TestAnalytics::parse`]. +#[derive(Clone)] +pub struct TestAnalytics<'data> { + pub(crate) timestamp: u32, + + pub(crate) header: &'data raw::Header, + + pub(crate) flags_set: FlagsSet<'data>, + pub(crate) commithashes_bytes: &'data [u8], + pub(crate) string_bytes: &'data [u8], + + pub(crate) tests: &'data [raw::Test], + pub(crate) testdata: &'data [raw::TestData], +} + +impl<'data> TestAnalytics<'data> { + /// Parses the given buffer into [`TestAnalytics`]. + pub fn parse(buf: &'data [u8], timestamp: u32) -> Result { + let (header, rest) = + raw::Header::ref_from_prefix(buf).ok_or(TestAnalyticsErrorKind::InvalidHeader)?; + + if header.magic != raw::TA_MAGIC { + return Err(TestAnalyticsErrorKind::InvalidMagic(header.magic).into()); + } + + if header.version != TA_VERSION { + return Err(TestAnalyticsErrorKind::WrongVersion(header.version).into()); + } + + let (tests, rest) = raw::Test::slice_from_prefix(rest, header.num_tests as usize) + .ok_or(TestAnalyticsErrorKind::InvalidTables)?; + + let expected_data = header.num_tests as usize * header.num_days as usize; + + let (testdata, rest) = raw::TestData::slice_from_prefix(rest, expected_data) + .ok_or(TestAnalyticsErrorKind::InvalidTables)?; + + let (flags_set, rest) = u32::slice_from_prefix(rest, header.flags_set_len as usize) + .ok_or(TestAnalyticsErrorKind::InvalidTables)?; + + let (commithashes_bytes, rest) = + u8::slice_from_prefix(rest, header.commithashes_bytes as usize) + .ok_or(TestAnalyticsErrorKind::InvalidTables)?; + + let string_bytes = rest.get(..header.string_bytes as usize).ok_or( + TestAnalyticsErrorKind::UnexpectedStringBytes { + expected: header.string_bytes as usize, + found: rest.len(), + }, + )?; + + let flags_set = FlagsSet::load(string_bytes, flags_set)?; + + Ok(Self { + timestamp: timestamp.max(header.timestamp), + + header, + + flags_set, + commithashes_bytes, + string_bytes, + + tests, + testdata, + }) + } + + /// Iterates over the [`Test`]s included in the [`TestAnalytics`] summary. + pub fn tests( + &self, + desired_range: Range, + flags: Option<&[&str]>, + ) -> Result< + impl Iterator, TestAnalyticsError>> + '_, + TestAnalyticsError, + > { + let matching_flags_sets = if let Some(flags) = flags { + let flag_sets = self.flags_set.iter(self.string_bytes); + + let mut matching_flags_sets: SmallVec = Default::default(); + for res in flag_sets { + let (offset, flag_set) = res?; + if flags.iter().any(|flag| flag_set.contains(flag)) { + matching_flags_sets.push(offset); + } + } + matching_flags_sets.sort(); + + Some(matching_flags_sets) + } else { + None + }; + let mut failing_commits = HashSet::new(); + + let num_days = self.header.num_days as usize; + let tests = self.tests.iter().enumerate().filter_map(move |(i, test)| { + if let Some(flags_sets) = &matching_flags_sets { + if !flags_sets.contains(&test.flag_set_offset) { + return None; + } + } + + let start_idx = i * num_days; + let latest_test_timestamp = self.testdata[start_idx].last_timestamp; + + let today_offset = offset_from_today(latest_test_timestamp, self.timestamp); + let data_range = start_idx..start_idx + test.valid_data as usize; + let adjusted_range = + adjust_selection_range(data_range, desired_range.clone(), today_offset); + + if adjusted_range.is_empty() { + return None; + } + + let aggregates = Aggregates::from_data( + self.commithashes_bytes, + &mut failing_commits, + &self.testdata[adjusted_range], + ); + + Some(aggregates.map(|aggregates| Test { + container: self, + data: test, + aggregates, + })) + }); + Ok(tests) + } +} + +impl fmt::Debug for TestAnalytics<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TestAnalytics") + .field("version", &self.header.version) + .field("tests", &self.header.num_tests) + .field("days", &self.header.num_days) + .field("string_bytes", &self.header.string_bytes) + .finish() + } +} + +/// This represents a specific test for which test analytics data is gathered. +#[derive(Debug, Clone)] +pub struct Test<'data, 'parsed> { + container: &'parsed TestAnalytics<'data>, + data: &'data raw::Test, + + aggregates: Aggregates, +} + +impl<'data> Test<'data, '_> { + /// Returns the testsuite of the test. + pub fn testsuite(&self) -> Result<&'data str, TestAnalyticsError> { + watto::StringTable::read( + self.container.string_bytes, + self.data.testsuite_offset as usize, + ) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference.into()) + } + + /// Returns the name of the test. + pub fn name(&self) -> Result<&'data str, TestAnalyticsError> { + watto::StringTable::read(self.container.string_bytes, self.data.name_offset as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference.into()) + } + + pub fn flags(&self) -> Result, TestAnalyticsError> { + self.container + .flags_set + .resolve(self.container.string_bytes, self.data.flag_set_offset) + } + + /// Returns the calculated aggregates. + pub fn aggregates(&self) -> &Aggregates { + &self.aggregates + } +} + +/// Contains test run data aggregated over a given time period. +#[derive(Debug, Clone, PartialEq)] +pub struct Aggregates { + pub total_pass_count: u32, + pub total_fail_count: u32, + pub total_skip_count: u32, + pub total_flaky_fail_count: u32, + + pub failure_rate: f32, + pub flake_rate: f32, + + pub avg_duration: f64, + + pub failing_commits: usize, +} + +impl Aggregates { + fn from_data( + commithashes_bytes: &[u8], + all_failing_commits: &mut HashSet, + data: &[raw::TestData], + ) -> Result { + let mut total_pass_count = 0; + let mut total_fail_count = 0; + let mut total_skip_count = 0; + let mut total_flaky_fail_count = 0; + let mut total_duration = 0.; + + for testdata in data { + total_pass_count += testdata.total_pass_count as u32; + total_fail_count += testdata.total_fail_count as u32; + total_skip_count += testdata.total_skip_count as u32; + total_flaky_fail_count += testdata.total_flaky_fail_count as u32; + total_duration += testdata.total_duration as f64; + + let failing_commits = + CommitHashesSet::read_raw(commithashes_bytes, testdata.failing_commits_set)?; + all_failing_commits.extend(failing_commits); + } + + let failing_commits = all_failing_commits.len(); + all_failing_commits.clear(); + + let total_run_count = total_pass_count + total_fail_count; + let (failure_rate, flake_rate, avg_duration) = if total_run_count > 0 { + ( + total_fail_count as f32 / total_run_count as f32, + total_flaky_fail_count as f32 / total_run_count as f32, + total_duration / total_run_count as f64, + ) + } else { + (0., 0., 0.) + }; + + Ok(Aggregates { + total_pass_count, + total_fail_count, + total_skip_count, + total_flaky_fail_count, + + failure_rate, + flake_rate, + + avg_duration, + + failing_commits, + }) + } +} diff --git a/src/binary/mod.rs b/src/binary/mod.rs new file mode 100644 index 0000000..743efc2 --- /dev/null +++ b/src/binary/mod.rs @@ -0,0 +1,442 @@ +mod bindings; +mod commithashes_set; +mod error; +mod flags_set; +mod format; +mod raw; +mod timestamps; +mod writer; + +pub use bindings::{AggregationReader, BinaryFormatWriter, TestAggregate}; +pub use error::{TestAnalyticsError, TestAnalyticsErrorKind}; +pub use format::{Test, TestAnalytics}; +pub use raw::CommitHash; +pub use writer::TestAnalyticsWriter; + +#[cfg(test)] +mod tests { + use raw::CommitHash; + use timestamps::DAY; + + use crate::testrun::{Outcome, Testrun}; + + use super::*; + + fn test() -> Testrun { + Testrun { + name: "abc".into(), + classname: "".into(), + duration: Some(1.0), + outcome: Outcome::Pass, + testsuite: "".into(), + failure_message: None, + filename: None, + build_url: None, + computed_name: None, + } + } + + #[test] + fn test_empty() { + let writer = TestAnalyticsWriter::new(60); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, 0).unwrap(); + assert!(parsed.tests(0..60, None).unwrap().next().is_none()); + } + + #[test] + fn test_builder() { + let mut test = test(); + + let mut writer = TestAnalyticsWriter::new(2); + let mut session = writer.start_session(0, CommitHash::default(), &[]); + + session.insert(&test); + + test.outcome = Outcome::Failure; + test.duration = Some(2.0); + session.insert(&test); + + test.name = "def".into(); + test.outcome = Outcome::Skip; + test.duration = Some(0.0); + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, 0).unwrap(); + let mut tests = parsed.tests(0..60, None).unwrap(); + + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.total_fail_count, 1); + assert_eq!(aggregates.avg_duration, 1.5); + + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "def"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_skip_count, 1); + + assert!(tests.next().is_none()); + } + + #[test] + fn test_testsuites() { + let mut test = test(); + + let mut writer = TestAnalyticsWriter::new(2); + let mut session = writer.start_session(0, CommitHash::default(), &[]); + + session.insert(&test); + test.testsuite = "some testsuite".into(); + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, 0).unwrap(); + let mut tests = parsed.tests(0..60, None).unwrap(); + + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.testsuite().unwrap(), ""); + assert_eq!(abc.name().unwrap(), "abc"); + + let abc_with_testsuite = tests.next().unwrap().unwrap(); + assert_eq!(abc_with_testsuite.testsuite().unwrap(), "some testsuite"); + assert_eq!(abc_with_testsuite.name().unwrap(), "abc"); + + assert!(tests.next().is_none()); + } + + #[test] + fn test_time_shift() { + let test = test(); + + let mut writer = TestAnalyticsWriter::new(2); + let mut session = writer.start_session(0, CommitHash::default(), &[]); + + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + // the test was written at timestamp `0`, and we parse at that same timestamp + // so we expect the data in the "today" bucket + let parsed = TestAnalytics::parse(&buf, 0).unwrap(); + let mut tests = parsed.tests(0..1, None).unwrap(); + + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.avg_duration, 1.0); + + assert!(tests.next().is_none()); + + // next, we re-parse one day ahead + let parsed = TestAnalytics::parse(&buf, DAY).unwrap(); + + // the test has no data for "today", so is not being yielded + let mut tests = parsed.tests(0..1, None).unwrap(); + assert!(tests.next().is_none()); + + // the data should be in the "yesterday" bucket + let mut tests = parsed.tests(1..2, None).unwrap(); + + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.avg_duration, 1.0); + + assert!(tests.next().is_none()); + } + + #[test] + fn test_append_data() { + let test = test(); + + let mut writer = TestAnalyticsWriter::new(2); + let mut session = writer.start_session(0, CommitHash::default(), &[]); + + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, DAY).unwrap(); + let mut writer = TestAnalyticsWriter::from_existing_format(&parsed).unwrap(); + let mut session = writer.start_session(DAY, CommitHash::default(), &[]); + + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, DAY).unwrap(); + + // we should have data in the "today" bucket + let mut tests = parsed.tests(0..1, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.avg_duration, 1.0); + assert!(tests.next().is_none()); + + // as well as in the "yesterday" bucket + let mut tests = parsed.tests(1..2, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.avg_duration, 1.0); + assert!(tests.next().is_none()); + } + + #[test] + fn test_merge() { + let test = test(); + + let mut writer = TestAnalyticsWriter::new(2); + let mut session = writer.start_session(0, CommitHash::default(), &[]); + session.insert(&test); + let mut buf_1 = vec![]; + writer.serialize(&mut buf_1).unwrap(); + + let mut writer = TestAnalyticsWriter::new(2); + let mut session = writer.start_session(DAY, CommitHash::default(), &[]); + session.insert(&test); + let mut buf_2 = vec![]; + writer.serialize(&mut buf_2).unwrap(); + + let parsed_1 = TestAnalytics::parse(&buf_1, DAY).unwrap(); + let parsed_2 = TestAnalytics::parse(&buf_2, DAY).unwrap(); + + let merged_12 = TestAnalyticsWriter::merge(&parsed_1, &parsed_2).unwrap(); + let merged_21 = TestAnalyticsWriter::merge(&parsed_2, &parsed_1).unwrap(); + + let mut buf_12 = vec![]; + merged_12.serialize(&mut buf_12).unwrap(); + let mut buf_21 = vec![]; + merged_21.serialize(&mut buf_21).unwrap(); + + assert_eq!(buf_12, buf_21); + + let parsed = TestAnalytics::parse(&buf_12, DAY).unwrap(); + + // we should have data in the "today" bucket + let mut tests = parsed.tests(0..1, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.avg_duration, 1.0); + assert!(tests.next().is_none()); + + // as well as in the "yesterday" bucket + let mut tests = parsed.tests(1..2, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.avg_duration, 1.0); + assert!(tests.next().is_none()); + } + + #[test] + fn test_garbage_collection() { + let test = test(); + + let mut writer = TestAnalyticsWriter::new(2); + let mut session = writer.start_session(0, CommitHash::default(), &[]); + + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, DAY).unwrap(); + let mut writer = TestAnalyticsWriter::from_existing_format(&parsed).unwrap(); + + let was_rewritten = writer.rewrite(2, DAY, Some(0)).unwrap(); + assert!(!was_rewritten); + + let was_rewritten = writer.rewrite(7, DAY, Some(0)).unwrap(); + assert!(was_rewritten); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, DAY).unwrap(); + + // nothing garbage collected yet, + // we should have data in the "yesterday" bucket + let mut tests = parsed.tests(1..2, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + let aggregates = abc.aggregates(); + assert_eq!(aggregates.total_pass_count, 1); + assert_eq!(aggregates.avg_duration, 1.0); + assert!(tests.next().is_none()); + + let mut writer = TestAnalyticsWriter::from_existing_format(&parsed).unwrap(); + + let was_rewritten = writer.rewrite(2, 3 * DAY, Some(0)).unwrap(); + assert!(was_rewritten); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, 3 * DAY).unwrap(); + let mut tests = parsed.tests(0..60, None).unwrap(); + + // the test was garbage collected + assert!(tests.next().is_none()); + } + + #[test] + fn test_flags() { + let test = test(); + + let mut writer = TestAnalyticsWriter::new(2); + + let mut session = writer.start_session(0, CommitHash::default(), &["flag-a"]); + session.insert(&test); + let mut session = writer.start_session(0, CommitHash::default(), &["flag-b"]); + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, DAY).unwrap(); + let mut tests = parsed.tests(0..60, None).unwrap(); + + // we get the test twice, with two different flags + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + assert_eq!(abc.flags().unwrap(), &["flag-a"]); + + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + assert_eq!(abc.flags().unwrap(), &["flag-b"]); + + assert!(tests.next().is_none()); + + // if we filter for flags, we get only matching tests: + let mut tests = parsed.tests(0..60, Some(&["flag-a"])).unwrap(); + + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + assert_eq!(abc.flags().unwrap(), &["flag-a"]); + assert!(tests.next().is_none()); + + let mut tests = parsed.tests(0..60, Some(&["non-existing"])).unwrap(); + assert!(tests.next().is_none()); + } + + #[test] + fn test_historic_data() { + let test = test(); + + let mut writer = TestAnalyticsWriter::new(7); + + let mut session = writer.start_session(3 * DAY, CommitHash::default(), &[]); + session.insert(&test); + // insert data older than what is already in the file + let mut session = writer.start_session(DAY, CommitHash::default(), &[]); + session.insert(&test); + + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, 4 * DAY).unwrap(); + + // we do not have any test data for "today" + let mut tests = parsed.tests(0..1, None).unwrap(); + assert!(tests.next().is_none()); + + // when filtering for "yesterday", we get valid data + let mut tests = parsed.tests(1..2, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + assert!(tests.next().is_none()); + + // also when filtering for two days prior to that + let mut tests = parsed.tests(2..4, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.name().unwrap(), "abc"); + assert!(tests.next().is_none()); + + // but not when going further back in time + let mut tests = parsed.tests(5..7, None).unwrap(); + assert!(tests.next().is_none()); + } + + #[test] + fn test_commit_hashes() { + let mut test = test(); + test.outcome = Outcome::Failure; + let commit_1 = CommitHash([0; 20]); + let commit_2 = CommitHash([1; 20]); + let commit_3 = CommitHash([2; 20]); + let commit_4 = CommitHash([3; 20]); + + let mut writer = TestAnalyticsWriter::new(7); + + let mut session = writer.start_session(DAY, commit_1, &[]); + session.insert(&test); + + let mut buf1 = vec![]; + writer.serialize(&mut buf1).unwrap(); + + let mut writer = TestAnalyticsWriter::new(7); + + let mut session = writer.start_session(DAY, commit_2, &[]); + session.insert(&test); + let mut session = writer.start_session(2 * DAY, commit_3, &[]); + session.insert(&test); + let mut session = writer.start_session(3 * DAY, commit_4, &[]); + session.insert(&test); + + let mut buf2 = vec![]; + writer.serialize(&mut buf2).unwrap(); + + let parsed1 = TestAnalytics::parse(&buf1, 3 * DAY).unwrap(); + let parsed2 = TestAnalytics::parse(&buf2, 3 * DAY).unwrap(); + + let merged = TestAnalyticsWriter::merge(&parsed1, &parsed2).unwrap(); + let mut buf = vec![]; + merged.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, 3 * DAY).unwrap(); + let mut writer = TestAnalyticsWriter::from_existing_format(&parsed).unwrap(); + let was_rewritten = writer.rewrite(14, 3 * DAY, Some(0)).unwrap(); + assert!(was_rewritten); + let mut buf = vec![]; + writer.serialize(&mut buf).unwrap(); + + let parsed = TestAnalytics::parse(&buf, 3 * DAY).unwrap(); + + let mut tests = parsed.tests(0..1, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.aggregates().failing_commits, 1); // commit 4 + assert!(tests.next().is_none()); + + let mut tests = parsed.tests(2..3, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.aggregates().failing_commits, 2); // commit 1, commit 2 + assert!(tests.next().is_none()); + + let mut tests = parsed.tests(0..60, None).unwrap(); + let abc = tests.next().unwrap().unwrap(); + assert_eq!(abc.aggregates().failing_commits, 4); // commit 1 - 4 + assert!(tests.next().is_none()); + } +} diff --git a/src/binary/raw.rs b/src/binary/raw.rs new file mode 100644 index 0000000..29c937a --- /dev/null +++ b/src/binary/raw.rs @@ -0,0 +1,83 @@ +use watto::Pod; + +/// The magic file preamble, encoded as little-endian `CCTA`. +pub const TA_MAGIC: u32 = u32::from_le_bytes(*b"CCTA"); + +#[derive(Debug, Clone, PartialEq, Eq)] +#[repr(C)] +pub struct Header { + /// The file magic representing the file format and endianness. + pub magic: u32, + /// The file format version. + pub version: u32, + /// Timestamp when the file was last touched. + pub timestamp: u32, + /// Number of tests within the file. + pub num_tests: u32, + /// Number of days worth of aggregated data. + pub num_days: u32, + /// Length of the `FlagsSet` table. + pub flags_set_len: u32, + /// Length of the `CommitHashesSet` table. + pub commithashes_bytes: u32, + /// Length of the string table. + pub string_bytes: u32, +} +unsafe impl Pod for Header {} + +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct Test { + /// Offset of the Testsuite name within the string table. + pub testsuite_offset: u32, + /// Offset of the Test name within the string table. + pub name_offset: u32, + /// Offset of the Flag Set within the `FlagsSet` table. + pub flag_set_offset: u32, + /// The number of valid data entries. + pub valid_data: u32, +} +unsafe impl Pod for Test {} + +#[derive(Debug, Clone, Copy, Default)] +#[repr(C)] +pub struct TestData { + pub last_timestamp: u32, + pub last_duration: f32, + + pub total_pass_count: u16, + pub total_fail_count: u16, + pub total_skip_count: u16, + pub total_flaky_fail_count: u16, + pub total_duration: f32, + + pub failing_commits_set: u32, +} +unsafe impl Pod for TestData {} + +#[derive(Debug, Clone, Copy, Default, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[repr(C)] +pub struct CommitHash(pub [u8; 20]); +unsafe impl Pod for CommitHash {} + +#[cfg(test)] +mod tests { + use std::mem; + + use super::*; + + #[test] + fn test_sizeof() { + assert_eq!(mem::size_of::
(), 32); + assert_eq!(mem::align_of::
(), 4); + + assert_eq!(mem::size_of::(), 16); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 24); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 20); + assert_eq!(mem::align_of::(), 1); + } +} diff --git a/src/binary/timestamps.rs b/src/binary/timestamps.rs new file mode 100644 index 0000000..f9610b3 --- /dev/null +++ b/src/binary/timestamps.rs @@ -0,0 +1,101 @@ +use std::ops::Range; + +/// Seconds in a day. +pub const DAY: u32 = 24 * 60 * 60; + +/// Calculates the offset (in days / indices) between +/// the "saved" timestamp vs "now". +pub fn offset_from_today(timestamp_saved: u32, timestamp_now: u32) -> usize { + let days_saved = timestamp_saved / DAY; + let days_now = timestamp_now / DAY; + + days_now as usize - days_saved as usize +} + +/// Possibly shifts `data` according to `today_offset`. +pub fn shift_data(data: &mut [T], mut today_offset: usize) { + if today_offset == 0 { + return; + } + today_offset = today_offset.min(data.len()); + + let slice_end = data.len() - today_offset; + data.copy_within(0..slice_end, today_offset); + + let begin = &mut data[0..today_offset]; + begin.fill_with(Default::default); +} + +/// This adjusts the `desired_range` to select the right subset of `data_range` +/// so that it matches up the days we want to select. +/// +/// The `desired_range` is always starts from "today" (0), and goes into the past. +/// So a range `0..2` (exclusive) would select "today" (0) and "yesterday" (1). +/// +/// To give an example using calendar days, our data, offset, desired and resulting +/// ranges may look like this: +/// ```compile_fail +/// # // ^ `compile_fail` because this is private :-( +/// # use test_results_parser::binary::timestamps::adjust_selection_range; +/// let data_range = 20..24; // representing data from 2024-11-20 to 2024-11-18 +/// // … | 2024-11-21 | 2024-11-20 | 2024-11-19 | 2024-11-18 | … +/// // ^- 20 | | ^- 23 +/// +/// let today_offset = 1; +/// // … | 2024-11-21 | … +/// // ^ today +/// +/// let desired_range = 0..2; // today and yesterday +/// +/// let resulting_range = adjust_selection_range(data_range, desired_range, today_offset); +/// assert_eq!(resulting_range, 20..21); +/// // … | 2024-11-21 | 2024-11-20 | … +/// // ^- 20 ^- 21 +/// ``` +pub fn adjust_selection_range( + data_range: Range, + desired_range: Range, + today_offset: usize, +) -> Range { + let range_start = (data_range.start + desired_range.start).saturating_sub(today_offset); + let range_end = (data_range.start + desired_range.end).saturating_sub(today_offset); + let range_start = range_start.min(data_range.end).max(data_range.start); + let range_end = range_end.min(data_range.end).max(data_range.start); + range_start..range_end +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_day_offsets() { + let offset = offset_from_today(0, DAY); + assert_eq!(offset, 1); + + let offset = offset_from_today(0, 7 * DAY); + assert_eq!(offset, 7); + } + + #[test] + fn test_range_adjustment() { + let range = adjust_selection_range(0..60, 0..7, 0); + assert_eq!(range, 0..7); + + let range = adjust_selection_range(0..7, 0..60, 0); + assert_eq!(range, 0..7); + + let range = adjust_selection_range(20..28, 0..60, 2); + assert_eq!(range, 20..28); + } + + #[test] + fn test_shift_data() { + let mut data = vec![1, 2, 3]; + shift_data(&mut data, 0); + assert_eq!(&data, &[1, 2, 3]); + + shift_data(&mut data, 1); + assert_eq!(&data, &[0, 1, 2]); + } +} diff --git a/src/binary/writer.rs b/src/binary/writer.rs new file mode 100644 index 0000000..bb27b88 --- /dev/null +++ b/src/binary/writer.rs @@ -0,0 +1,466 @@ +use std::collections::{hash_map, HashMap}; +use std::io::Write; +use std::mem; + +use commithashes_set::CommitHashesSet; +use flags_set::FlagsSet; +use indexmap::IndexMap; +use raw::TestData; +use timestamps::{adjust_selection_range, offset_from_today, shift_data}; +use watto::{Pod, StringTable}; + +use crate::testrun; + +use super::*; + +pub struct InsertSession<'writer> { + writer: &'writer mut TestAnalyticsWriter, + + timestamp: u32, + commit_hash: raw::CommitHash, + flag_set_offset: u32, +} + +impl InsertSession<'_> { + /// Writes the data for the given [`Testrun`](testrun::Testrun) into the + /// underlying [`TestAnalyticsWriter`]. + pub fn insert(&mut self, test: &testrun::Testrun) { + let testsuite_offset = self.writer.string_table.insert(&test.testsuite) as u32; + let name_offset = self.writer.string_table.insert(&test.name) as u32; + let key = TestKey { + testsuite_offset, + name_offset, + flag_set_offset: self.flag_set_offset, + }; + let value = raw::Test { + testsuite_offset, + name_offset, + flag_set_offset: self.flag_set_offset, + valid_data: 1, + }; + let (idx, replaced) = self.writer.tests.insert_full(key, value); + + let mut data_idx = idx * self.writer.num_days; + if replaced.is_none() { + let expected_size = self.writer.tests.len() * self.writer.num_days; + self.writer + .testdata + .resize_with(expected_size, TestData::default); + } else { + let latest_timestamp = self.writer.testdata[data_idx].last_timestamp; + + if latest_timestamp < self.timestamp { + // we are inserting newer data, so shift the existing data around + let today_offset = offset_from_today(latest_timestamp, self.timestamp); + + let range = data_idx..data_idx + self.writer.num_days; + shift_data(&mut self.writer.testdata[range], today_offset); + extend_valid_data( + &mut self.writer.tests[idx].valid_data, + today_offset, + self.writer.num_days, + ); + } else { + // otherwise, we are inserting historic data, so adjust our `data_idx` accordingly + let today_offset = offset_from_today(self.timestamp, latest_timestamp); + if today_offset >= self.writer.num_days { + return; + } + data_idx += today_offset; + self.writer.tests[idx].valid_data = self.writer.tests[idx] + .valid_data + .max(1 + today_offset as u32); + } + } + + let testdata = &mut self.writer.testdata[data_idx]; + testdata.total_duration += test.duration.unwrap_or_default() as f32; + + if testdata.last_timestamp <= self.writer.timestamp { + testdata.last_timestamp = self.writer.timestamp; + testdata.last_duration = test.duration.unwrap_or_default() as f32; + } + + match test.outcome { + testrun::Outcome::Skip => testdata.total_skip_count += 1, + testrun::Outcome::Pass => testdata.total_pass_count += 1, + testrun::Outcome::Error | testrun::Outcome::Failure => { + testdata.total_fail_count += 1; + testdata.failing_commits_set = self + .writer + .commithashes_set + .append_intersection(testdata.failing_commits_set, &[self.commit_hash]); + } + } + } +} + +fn extend_valid_data(valid_data: &mut u32, offset: usize, num_days: usize) { + *valid_data = (*valid_data as usize + offset).min(num_days) as u32; +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +struct TestKey { + pub testsuite_offset: u32, + pub name_offset: u32, + pub flag_set_offset: u32, +} + +/// The [`TestAnalytics`] File Writer. +#[derive(Debug)] +pub struct TestAnalyticsWriter { + num_days: usize, + + string_table: StringTable, + flags_set: FlagsSet<'static>, + commithashes_set: CommitHashesSet, + + timestamp: u32, + + tests: IndexMap, + testdata: Vec, +} + +impl TestAnalyticsWriter { + /// Creates a new Writer. + pub fn new(num_days: usize) -> Self { + Self { + num_days, + + string_table: StringTable::default(), + flags_set: FlagsSet::default(), + commithashes_set: CommitHashesSet::new(), + + timestamp: 0, + + tests: IndexMap::new(), + testdata: vec![], + } + } + + /// Creates an insertion session which allows inserting test run results. + pub fn start_session( + &mut self, + timestamp: u32, + commit_hash: raw::CommitHash, + flags: &[&str], + ) -> InsertSession<'_> { + self.timestamp = self.timestamp.max(timestamp); + let flag_set_offset = self.flags_set.insert(&mut self.string_table, flags); + + InsertSession { + writer: self, + timestamp, + flag_set_offset, + commit_hash, + } + } + + /// Turns an existing parsed [`TestAnalytics`] file into a writer. + pub fn from_existing_format(data: &TestAnalytics) -> Result { + let tests = IndexMap::from_iter(data.tests.iter().map(|test| { + let key = TestKey { + testsuite_offset: test.testsuite_offset, + name_offset: test.name_offset, + flag_set_offset: test.flag_set_offset, + }; + (key, *test) + })); + + let string_table = StringTable::from_bytes(data.string_bytes) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + let flags_set = data.flags_set.to_owned(); + let commithashes_set = CommitHashesSet::from_bytes(data.commithashes_bytes)?; + + Ok(Self { + num_days: data.header.num_days as usize, + + string_table, + flags_set, + commithashes_set, + + timestamp: data.timestamp, + + tests, + testdata: data.testdata.into(), + }) + } + + /// Merges the two parsed [`TestAnalytics`] into a writer. + pub fn merge(a: &TestAnalytics, b: &TestAnalytics) -> Result { + // merging the smaller into the larger is usually the more performant thing to do: + let (larger, smaller) = + if (b.header.num_days, b.header.num_tests) > (a.header.num_tests, a.header.num_tests) { + (b, a) + } else { + (a, b) + }; + + let mut writer = Self::from_existing_format(larger)?; + writer.timestamp = a.timestamp.max(b.timestamp); + + // we just assume a 75% overlap, or 25% new unique entries: + let expected_new = smaller.header.num_tests as usize / 4; + writer.tests.reserve(expected_new); + let expected_reserve = expected_new * writer.num_days; + writer.testdata.reserve(expected_reserve); + + let smaller_flags = smaller.flags_set.iter(smaller.string_bytes); + let mut flags_mapping = HashMap::with_capacity(smaller_flags.len()); + for res in smaller_flags { + let (smaller_offset, flags) = res?; + let larger_offset = writer.flags_set.insert(&mut writer.string_table, &flags); + flags_mapping.insert(smaller_offset, larger_offset); + } + + for (smaller_idx, test) in smaller.tests.iter().enumerate() { + let testsuite = StringTable::read(smaller.string_bytes, test.testsuite_offset as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + let name = StringTable::read(smaller.string_bytes, test.name_offset as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + + let testsuite_offset = writer.string_table.insert(testsuite) as u32; + let name_offset = writer.string_table.insert(name) as u32; + let flag_set_offset = *flags_mapping + .get(&test.flag_set_offset) + .ok_or(TestAnalyticsErrorKind::InvalidFlagSetReference)?; + + let key = TestKey { + testsuite_offset, + name_offset, + flag_set_offset, + }; + let value = raw::Test { + testsuite_offset, + name_offset, + flag_set_offset, + valid_data: 1, + }; + let (idx, replaced) = writer.tests.insert_full(key, value); + + let data_idx = idx * writer.num_days; + let smaller_idx = smaller_idx * smaller.header.num_days as usize; + let smaller_timestamp = smaller.testdata[smaller_idx].last_timestamp; + + let larger_timestamp = if replaced.is_none() { + let expected_size = writer.tests.len() * writer.num_days; + writer + .testdata + .resize_with(expected_size, TestData::default); + + smaller_timestamp + } else { + writer.testdata[data_idx].last_timestamp + }; + + let (smaller_range, today_offset) = if smaller_timestamp > larger_timestamp { + // smaller has more recent data buckets, so we shift things around: + let today_offset = offset_from_today(larger_timestamp, smaller_timestamp); + let range = data_idx..data_idx + writer.num_days; + + shift_data(&mut writer.testdata[range], today_offset); + extend_valid_data( + &mut writer.tests[idx].valid_data, + today_offset, + writer.num_days, + ); + + let smaller_range = adjust_selection_range( + smaller_idx..smaller_idx + smaller.header.num_days as usize, + 0..writer.num_days, + today_offset, + ); + (smaller_range, 0) + } else { + let today_offset = offset_from_today(smaller_timestamp, larger_timestamp); + let smaller_range = adjust_selection_range( + smaller_idx..smaller_idx + smaller.header.num_days as usize, + 0..writer.num_days, + today_offset, + ); + + (smaller_range, today_offset) + }; + + let overlap_len = smaller_range.end - smaller_range.start; + let idx_start = data_idx + today_offset; + let larger_range = idx_start..idx_start + overlap_len; + + writer.tests[idx].valid_data = writer.tests[idx] + .valid_data + .max((larger_range.end - data_idx) as u32) + .min(writer.num_days as u32); + + let commithashes_bytes = smaller.commithashes_bytes; + + let larger_data = &mut writer.testdata[larger_range]; + let smaller_data = &smaller.testdata[smaller_range]; + + for (larger, smaller) in larger_data.iter_mut().zip(smaller_data) { + larger.total_pass_count += smaller.total_pass_count; + larger.total_fail_count += smaller.total_fail_count; + larger.total_skip_count += smaller.total_skip_count; + larger.total_flaky_fail_count += smaller.total_flaky_fail_count; + larger.total_duration += smaller.total_duration; + + if smaller.last_timestamp >= larger.last_timestamp { + larger.last_timestamp = smaller.last_timestamp; + larger.last_duration = smaller.last_duration; + } + + let smaller_failing_commits = + CommitHashesSet::read_raw(commithashes_bytes, smaller.failing_commits_set)?; + if !smaller_failing_commits.is_empty() { + larger.failing_commits_set = writer + .commithashes_set + .append_intersection(larger.failing_commits_set, smaller_failing_commits); + } + } + } + + Ok(writer) + } + + /// Does garbage collection by rewriting test records and throwing away those with expired data. + /// + /// This also makes sure that the data records are being truncated or extended to `num_days`. + /// In case no `num_days` adjustment is necessary, this will only rewrite all records when the number of expired records + /// exceeds `threshold`, which defaults to 25% of the records. + pub fn rewrite( + &mut self, + mut num_days: usize, + timestamp: u32, + garbage_threshold: Option, + ) -> Result { + self.timestamp = self.timestamp.max(timestamp); + + let needs_resize = num_days != self.num_days; + let threshold = garbage_threshold.unwrap_or(self.tests.len() / 4); + let record_liveness: Vec<_> = (0..self.tests.len()) + .map(|idx| { + let data_idx = idx * self.num_days; + let test_timestamp = self.testdata[data_idx].last_timestamp; + let today_offset = offset_from_today(test_timestamp, self.timestamp); + today_offset < num_days + }) + .collect(); + + let live_records = record_liveness.iter().filter(|live| **live).count(); + let dead_records = self.tests.len() - live_records; + + if !(needs_resize || dead_records > threshold) { + return Ok(false); + } + + mem::swap(&mut num_days, &mut self.num_days); + let string_table = mem::take(&mut self.string_table); + let flags_set = mem::take(&mut self.flags_set); + let tests = mem::take(&mut self.tests); + let testdata = mem::take(&mut self.testdata); + let commithashes_set = mem::replace(&mut self.commithashes_set, CommitHashesSet::new()); + + let mut flags_mapping = HashMap::with_capacity(flags_set.map.len()); + + let expected_size = live_records * self.num_days; + self.tests.reserve(live_records); + self.testdata.reserve(expected_size); + + for ((old_idx, test), record_live) in tests.values().enumerate().zip(record_liveness) { + if !record_live { + continue; + } + + let flag_set_offset = match flags_mapping.entry(test.flag_set_offset) { + hash_map::Entry::Occupied(occupied_entry) => *occupied_entry.get(), + hash_map::Entry::Vacant(vacant_entry) => { + let flags = flags_set.resolve(string_table.as_bytes(), test.flag_set_offset)?; + let flag_set_offset = self.flags_set.insert(&mut self.string_table, &flags); + + *vacant_entry.insert(flag_set_offset) + } + }; + + let testsuite = + StringTable::read(string_table.as_bytes(), test.testsuite_offset as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + let name = StringTable::read(string_table.as_bytes(), test.name_offset as usize) + .map_err(|_| TestAnalyticsErrorKind::InvalidStringReference)?; + + let testsuite_offset = self.string_table.insert(testsuite) as u32; + let name_offset = self.string_table.insert(name) as u32; + let key = TestKey { + testsuite_offset, + name_offset, + flag_set_offset, + }; + let value = raw::Test { + testsuite_offset, + name_offset, + flag_set_offset, + valid_data: test.valid_data.max(num_days as u32), + }; + let (_new_idx, replaced) = self.tests.insert_full(key, value); + assert!(replaced.is_none()); // the records are already unique, and we re-insert those + + let overlap_days = num_days.min(self.num_days); + let old_idx = old_idx * num_days; + + let old_range = old_idx..old_idx + overlap_days; + self.testdata.extend(testdata[old_range].iter().map(|data| { + let failing_commits = commithashes_set.read(data.failing_commits_set); + let failing_commits_set = self + .commithashes_set + .append_intersection(0, failing_commits); + + TestData { + failing_commits_set, + ..*data + } + })); + + let expected_size = self.tests.len() * self.num_days; + self.testdata.resize_with(expected_size, TestData::default); + } + + Ok(true) + } + + /// Serialize the converted data. + /// + /// This writes the [`TestAnalytics`] binary format into the given [`Write`]. + pub fn serialize(self, writer: &mut W) -> std::io::Result<()> { + let mut writer = watto::Writer::new(writer); + + let flags_set_table = self.flags_set.table; + let commithashes_bytes = self.commithashes_set.into_bytes(); + let string_bytes = self.string_table.into_bytes(); + + let header = raw::Header { + magic: raw::TA_MAGIC, + version: super::format::TA_VERSION, + timestamp: self.timestamp, + + num_days: self.num_days as u32, + num_tests: self.tests.len() as u32, + + flags_set_len: flags_set_table.len() as u32, + commithashes_bytes: commithashes_bytes.len() as u32, + string_bytes: string_bytes.len() as u32, + }; + + writer.write_all(header.as_bytes())?; + + for test in self.tests.into_values() { + writer.write_all(test.as_bytes())?; + } + + writer.write_all(self.testdata.as_bytes())?; + + writer.write_all(flags_set_table.as_bytes())?; + + writer.write_all(&commithashes_bytes)?; + writer.write_all(&string_bytes)?; + + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 7f008ba..b1ee782 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,14 @@ use pyo3::exceptions::PyException; use pyo3::prelude::*; +pub mod binary; mod compute_name; mod failure_message; mod junit; mod testrun; +pub use testrun::{Framework, Outcome, Testrun}; + pyo3::create_exception!(test_results_parser, ParserError, PyException); pyo3::create_exception!(test_results_parser, ComputeNameError, PyException); @@ -22,5 +25,10 @@ fn test_results_parser(py: Python, m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(failure_message::build_message, m)?)?; m.add_function(wrap_pyfunction!(failure_message::escape_message, m)?)?; m.add_function(wrap_pyfunction!(failure_message::shorten_file_paths, m)?)?; + + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) } diff --git a/src/testrun.rs b/src/testrun.rs index 274aec6..a9a8f36 100644 --- a/src/testrun.rs +++ b/src/testrun.rs @@ -1,7 +1,7 @@ use std::fmt::Display; use pyo3::class::basic::CompareOp; -use pyo3::{prelude::*, pyclass}; +use pyo3::prelude::*; #[derive(Clone, Copy, Debug, PartialEq)] #[pyclass(eq, eq_int)] diff --git a/tests/test_aggregation.py b/tests/test_aggregation.py new file mode 100644 index 0000000..9c0e47b --- /dev/null +++ b/tests/test_aggregation.py @@ -0,0 +1,45 @@ +from datetime import datetime, timezone + +from test_results_parser import ( + parse_junit_xml, + AggregationReader, + BinaryFormatWriter, +) + +def test_aggregation(): + with open("./tests/junit.xml", "br") as f: + junit_file = f.read() + parsed = parse_junit_xml(junit_file) + + now = int(datetime.now(timezone.utc).timestamp()) + + writer = BinaryFormatWriter() + writer.add_testruns( + timestamp=now, + commit_hash="e9fcd08652d091fa0c8d28e323c24fb0f4acf249", + flags=["upload", "flags"], + testruns=parsed.testruns, + ) + + serialized = writer.serialize() + reader = AggregationReader(serialized, now) + + tests = reader.get_test_aggregates(0, 2) + for test in tests: + test_dict = { + "name": test.name, + "test_id": test.test_id,# TODO + "testsuite": test.testsuite, + "flags": test.flags, + "failure_rate": test.failure_rate, + "flake_rate": test.flake_rate, + "updated_at":test.updated_at,# TODO + "avg_duration":test.avg_duration, + "total_fail_count":test.total_fail_count, + "total_flaky_fail_count":test.total_flaky_fail_count, + "total_pass_count":test.total_pass_count, + "total_skip_count":test.total_skip_count, + "commits_where_fail":test.commits_where_fail, + "last_duration":test.last_duration,# TODO + } + print(test_dict) \ No newline at end of file