From 4891fc4e2ac69cd1344a39b22a59684ca6fd136c Mon Sep 17 00:00:00 2001 From: Michael Kirk Date: Thu, 25 Jan 2024 12:52:33 -0800 Subject: [PATCH] Use faster loop iter (despite clippy's advice) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ cargo bench --bench="*" rust -- --baseline=main-2024-01-25 direct (rust impl)/default time: [27.773 µs 27.806 µs 27.842 µs] change: [-4.9255% -4.6540% -4.3868%] (p = 0.00 < 0.05) Performance has improved. inverse (rust impl)/default time: [70.088 µs 70.139 µs 70.194 µs] change: [-9.8981% -9.7426% -9.5881%] (p = 0.00 < 0.05) Performance has improved. --- README.md | 42 ++++++++++++------------------------------ src/geodesic.rs | 18 ++++++++++++------ 2 files changed, 24 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index cddd543..bdc9019 100644 --- a/README.md +++ b/README.md @@ -86,35 +86,17 @@ cargo bench Which produces output like: ```text - Running target/release/deps/geodesic_benchmark-af6ba4f7be913514 -direct (c wrapper) time: [34.852 us 34.937 us 35.023 us] - change: [+1.1137% +1.7246% +2.2864%] (p = 0.00 < 0.05) - Performance has regressed. -Found 9 outliers among 100 measurements (9.00%) - 3 (3.00%) low mild - 6 (6.00%) high mild - -direct (rust impl) time: [48.862 us 48.959 us 49.059 us] - change: [+0.0149% +0.8003% +1.5464%] (p = 0.04 < 0.05) - Change within noise threshold. -Found 9 outliers among 100 measurements (9.00%) - 1 (1.00%) low mild - 4 (4.00%) high mild - 4 (4.00%) high severe - -inverse (c wrapper) time: [70.875 us 71.138 us 71.464 us] - change: [+0.6259% +1.1321% +1.6653%] (p = 0.00 < 0.05) - Change within noise threshold. -Found 8 outliers among 100 measurements (8.00%) - 1 (1.00%) high mild - 7 (7.00%) high severe - -inverse (rust impl) time: [103.66 us 104.07 us 104.58 us] - change: [-1.0415% -0.0086% +1.0291%] (p = 0.99 > 0.05) - No change in performance detected. -Found 7 outliers among 100 measurements (7.00%) - 1 (1.00%) low mild - 6 (6.00%) high severe +direct (c wrapper)/default + time: [24.055 µs 24.085 µs 24.117 µs] + +direct (rust impl)/default + time: [27.760 µs 27.810 µs 27.867 µs] + +inverse (c wrapper)/default + time: [46.461 µs 47.435 µs 48.557 µs] + +inverse (rust impl)/default + time: [70.488 µs 70.841 µs 71.356 µs] ``` -Showing that, at least in this benchmark, the Rust implementation is 40-50% slower than the c bindings. +Showing that, at least in this benchmark, the Rust implementation is 16-52% slower than the c bindings. diff --git a/src/geodesic.rs b/src/geodesic.rs index e6baae2..4bd981e 100644 --- a/src/geodesic.rs +++ b/src/geodesic.rs @@ -185,10 +185,13 @@ impl Geodesic { pub fn _C3f(&self, eps: f64, c: &mut [f64]) { let mut mult = 1.0; let mut o = 0; - for (l, c_item) in c.iter_mut().enumerate().take(self.GEODESIC_ORDER).skip(1) { - let m = self.GEODESIC_ORDER - l - 1; + // Clippy wants us to turn this into `c.iter_mut().enumerate().take(geodesic_order + 1).skip(1)` + // but benching (rust-1.75) shows that it would be slower. + #[allow(clippy::needless_range_loop)] + for l in 1..GEODESIC_ORDER { + let m = GEODESIC_ORDER - l - 1; mult *= eps; - *c_item = mult * geomath::polyval(m, &self._C3x[o..], eps); + c[l] = mult * geomath::polyval(m, &self._C3x[o..], eps); o += m + 1; } } @@ -196,9 +199,12 @@ impl Geodesic { pub fn _C4f(&self, eps: f64, c: &mut [f64]) { let mut mult = 1.0; let mut o = 0; - for (l, c_item) in c.iter_mut().enumerate().take(self.GEODESIC_ORDER) { - let m = self.GEODESIC_ORDER - l - 1; - *c_item = mult * geomath::polyval(m, &self._C4x[o..], eps); + // Clippy wants us to turn this into `c.iter_mut().enumerate().take(geodesic_order + 1).skip(1)` + // but benching (rust-1.75) shows that it would be slower. + #[allow(clippy::needless_range_loop)] + for l in 0..GEODESIC_ORDER { + let m = GEODESIC_ORDER - l - 1; + c[l] = mult * geomath::polyval(m, &self._C4x[o..], eps); o += m + 1; mult *= eps; }