Skip to content

Commit

Permalink
Use faster loop iter (despite clippy's advice)
Browse files Browse the repository at this point in the history
    $ cargo bench --bench="*" rust --  --baseline=main-2024-01-25
    direct (rust impl)/default
                            time:   [27.773 µs 27.806 µs 27.842 µs]
                            change: [-4.9255% -4.6540% -4.3868%] (p = 0.00 < 0.05)
                            Performance has improved.

    inverse (rust impl)/default
                            time:   [70.088 µs 70.139 µs 70.194 µs]
                            change: [-9.8981% -9.7426% -9.5881%] (p = 0.00 < 0.05)
                            Performance has improved.
  • Loading branch information
michaelkirk committed Jan 25, 2024
1 parent 9f2b3b9 commit 4891fc4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 36 deletions.
42 changes: 12 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,35 +86,17 @@ cargo bench
Which produces output like:

```text
Running target/release/deps/geodesic_benchmark-af6ba4f7be913514
direct (c wrapper) time: [34.852 us 34.937 us 35.023 us]
change: [+1.1137% +1.7246% +2.2864%] (p = 0.00 < 0.05)
Performance has regressed.
Found 9 outliers among 100 measurements (9.00%)
3 (3.00%) low mild
6 (6.00%) high mild
direct (rust impl) time: [48.862 us 48.959 us 49.059 us]
change: [+0.0149% +0.8003% +1.5464%] (p = 0.04 < 0.05)
Change within noise threshold.
Found 9 outliers among 100 measurements (9.00%)
1 (1.00%) low mild
4 (4.00%) high mild
4 (4.00%) high severe
inverse (c wrapper) time: [70.875 us 71.138 us 71.464 us]
change: [+0.6259% +1.1321% +1.6653%] (p = 0.00 < 0.05)
Change within noise threshold.
Found 8 outliers among 100 measurements (8.00%)
1 (1.00%) high mild
7 (7.00%) high severe
inverse (rust impl) time: [103.66 us 104.07 us 104.58 us]
change: [-1.0415% -0.0086% +1.0291%] (p = 0.99 > 0.05)
No change in performance detected.
Found 7 outliers among 100 measurements (7.00%)
1 (1.00%) low mild
6 (6.00%) high severe
direct (c wrapper)/default
time: [24.055 µs 24.085 µs 24.117 µs]
direct (rust impl)/default
time: [27.760 µs 27.810 µs 27.867 µs]
inverse (c wrapper)/default
time: [46.461 µs 47.435 µs 48.557 µs]
inverse (rust impl)/default
time: [70.488 µs 70.841 µs 71.356 µs]
```

Showing that, at least in this benchmark, the Rust implementation is 40-50% slower than the c bindings.
Showing that, at least in this benchmark, the Rust implementation is 16-52% slower than the c bindings.
18 changes: 12 additions & 6 deletions src/geodesic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,20 +185,26 @@ impl Geodesic {
pub fn _C3f(&self, eps: f64, c: &mut [f64]) {
let mut mult = 1.0;
let mut o = 0;
for (l, c_item) in c.iter_mut().enumerate().take(self.GEODESIC_ORDER).skip(1) {
let m = self.GEODESIC_ORDER - l - 1;
// Clippy wants us to turn this into `c.iter_mut().enumerate().take(geodesic_order + 1).skip(1)`
// but benching (rust-1.75) shows that it would be slower.
#[allow(clippy::needless_range_loop)]
for l in 1..GEODESIC_ORDER {
let m = GEODESIC_ORDER - l - 1;
mult *= eps;
*c_item = mult * geomath::polyval(m, &self._C3x[o..], eps);
c[l] = mult * geomath::polyval(m, &self._C3x[o..], eps);
o += m + 1;
}
}

pub fn _C4f(&self, eps: f64, c: &mut [f64]) {
let mut mult = 1.0;
let mut o = 0;
for (l, c_item) in c.iter_mut().enumerate().take(self.GEODESIC_ORDER) {
let m = self.GEODESIC_ORDER - l - 1;
*c_item = mult * geomath::polyval(m, &self._C4x[o..], eps);
// Clippy wants us to turn this into `c.iter_mut().enumerate().take(geodesic_order + 1).skip(1)`
// but benching (rust-1.75) shows that it would be slower.
#[allow(clippy::needless_range_loop)]
for l in 0..GEODESIC_ORDER {
let m = GEODESIC_ORDER - l - 1;
c[l] = mult * geomath::polyval(m, &self._C4x[o..], eps);
o += m + 1;
mult *= eps;
}
Expand Down

0 comments on commit 4891fc4

Please sign in to comment.