Remove unnecessary allocations (#569)

image-rs · Apr 27, 2024 · 30ab68f · 30ab68f
1 parent 62401e8
commit 30ab68f
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 79 deletions.
diff --git a/src/filter/median.rs b/src/filter/median.rs
@@ -252,7 +252,7 @@ impl HistSet {
     fn new(num_channels: u8, expected_count: u32) -> HistSet {
         // Can't use vec![[0u32; 256], num_channels as usize]
         // because arrays of length > 32 aren't cloneable.
-        let mut data = vec![];
+        let mut data = Vec::with_capacity(num_channels as usize);
         for _ in 0..num_channels {
             data.push([0u32; 256]);
         }

diff --git a/src/filter/mod.rs b/src/filter/mod.rs
@@ -11,7 +11,7 @@ use image::{GenericImage, GenericImageView, GrayImage, ImageBuffer, Luma, Pixel,
 use crate::definitions::{Clamp, Image};
 use crate::integral_image::{column_running_sum, row_running_sum};
 use crate::map::{ChannelMap, WithChannel};
-use num::{abs, pow, Num};
+use num::Num;
 
 use std::cmp::{max, min};
 use std::f32;
@@ -59,90 +59,77 @@ pub fn bilateral_filter(
         (-0.5 * x.powi(2) / sigma_squared).exp()
     }
 
-    /// Effectively a meshgrid command with flattened outputs.
-    fn window_coords(window_size: u32) -> (Vec<i32>, Vec<i32>) {
-        let window_start = (-(window_size as f32) / 2.0).floor() as i32;
-        let window_end = (window_size as f32 / 2.0).floor() as i32 + 1;
-        let window_range = window_start..window_end;
-        let v = window_range.collect::<Vec<i32>>();
-        let cc: Vec<i32> = v.iter().cycle().take(v.len() * v.len()).cloned().collect();
-        let mut rr = Vec::new();
-        let window_range = window_start..window_end;
-        for i in window_range {
-            rr.append(&mut vec![i; (window_size + 1) as usize]);
-        }
-        (rr, cc)
-    }
-
     /// Create look-up table of Gaussian weights for color dimension.
     fn compute_color_lut(bins: u32, sigma: f32, max_value: f32) -> Vec<f32> {
-        let v = (0..bins as i32).collect::<Vec<i32>>();
         let step_size = max_value / bins as f32;
-        let vals = v.iter().map(|&x| x as f32 * step_size).collect::<Vec<_>>();
         let sigma_squared = sigma.powi(2);
-        let gauss_weights = vals
-            .iter()
-            .map(|&x| gaussian_weight(x, sigma_squared))
-            .collect::<Vec<_>>();
-        gauss_weights
+        (0..bins)
+            .map(|x| x as f32 * step_size)
+            .map(|x| gaussian_weight(x, sigma_squared))
+            .collect()
     }
 
     /// Create look-up table of weights corresponding to flattened 2-D Gaussian kernel.
     fn compute_spatial_lut(window_size: u32, sigma: f32) -> Vec<f32> {
-        let (rr, cc) = window_coords(window_size);
-        let mut gauss_weights = Vec::new();
-        let it = rr.iter().zip(cc.iter());
+        let window_start = (-(window_size as f32) / 2.0).floor() as i32;
+        let window_end = (window_size as f32 / 2.0).floor() as i32 + 1;
+        let window_range = window_start..window_end;
+
+        let cc = window_range.clone().cycle().take(window_range.len().pow(2));
+        let n = window_size as usize + 1;
+        let rr = window_range.flat_map(|i| std::iter::repeat(i).take(n));
+
         let sigma_squared = sigma.powi(2);
-        for (r, c) in it {
-            let dist = f32::sqrt(pow(*r as f32, 2) + pow(*c as f32, 2));
-            gauss_weights.push(gaussian_weight(dist, sigma_squared));
-        }
-        gauss_weights
+        rr.zip(cc)
+            .map(|(r, c)| {
+                let dist = ((r as f32).powi(2) + (c as f32).powi(2)).sqrt();
+                gaussian_weight(dist, sigma_squared)
+            })
+            .collect()
     }
 
-    let (width, height) = image.dimensions();
-    let mut out = ImageBuffer::new(width, height);
     let max_value = *image.iter().max().unwrap() as f32;
-    let n_bins: u32 = 255; // for color or > 8-bit, make n_bins a user input for tuning accuracy.
+    let n_bins = 255u32; // for color or > 8-bit, make n_bins a user input for tuning accuracy.
     let color_lut = compute_color_lut(n_bins, sigma_color, max_value);
     let color_dist_scale = n_bins as f32 / max_value;
     let max_color_bin = (n_bins - 1) as usize;
     let range_lut = compute_spatial_lut(window_size, sigma_spatial);
     let window_size = window_size as i32;
     let window_extent = (window_size - 1) / 2;
-    let height = height as i32;
-    let width = width as i32;
-    for row in 0..height {
-        for col in 0..width {
-            let mut total_val: f32 = 0.;
-            let mut total_weight: f32 = 0.;
-            let window_center_val = image.get_pixel(col as u32, row as u32)[0] as i32;
-            for window_row in -window_extent..window_extent + 1 {
-                let window_row_abs: i32 = row + window_row;
-                let window_row_abs: i32 = min(height - 1, max(0, window_row_abs)); // Wrap to edge.
-                let kr: i32 = window_row + window_extent;
-                for window_col in -window_extent..window_extent + 1 {
-                    let window_col_abs: i32 = col + window_col;
-                    let window_col_abs: i32 = min(width - 1, max(0, window_col_abs)); // Wrap to edge.
-                    let kc: i32 = window_col + window_extent;
-                    let range_bin = (kr * window_size + kc) as usize;
-                    let range_weight: f32 = range_lut[range_bin];
-                    let val: i32 =
-                        image.get_pixel(window_col_abs as u32, window_row_abs as u32)[0] as i32;
-                    let color_dist: i32 = abs(window_center_val - val);
-                    let color_bin = (color_dist as f32 * color_dist_scale) as usize;
-                    let color_bin: usize = min(color_bin, max_color_bin);
-                    let color_weight: f32 = color_lut[color_bin];
-                    let weight: f32 = range_weight * color_weight;
-                    total_val += val as f32 * weight;
-                    total_weight += weight;
-                }
+
+    let (width, height) = image.dimensions();
+    Image::from_fn(width, height, |col, row| {
+        let mut total_val = 0f32;
+        let mut total_weight = 0f32;
+        debug_assert!(col < width);
+        debug_assert!(row < height);
+        // Safety: `Image::from_fn` yields `col` in [0, width) and `row` in [0, height).
+        let window_center_val = unsafe { image.unsafe_get_pixel(col, row)[0] } as i32;
+
+        for window_row in -window_extent..window_extent + 1 {
+            let window_row_abs =
+                (row as i32 + window_row).clamp(0, height.saturating_sub(1) as i32) as u32;
+            let kr = window_row + window_extent;
+            for window_col in -window_extent..window_extent + 1 {
+                let window_col_abs =
+                    (col as i32 + window_col).clamp(0, width.saturating_sub(1) as i32) as u32;
+                debug_assert!(window_col_abs < width);
+                debug_assert!(window_row_abs < height);
+                // Safety: we clamped `window_row_abs` and `window_col_abs` to be in bounds.
+                let val = unsafe { image.unsafe_get_pixel(window_col_abs, window_row_abs)[0] };
+
+                let kc = window_col + window_extent;
+                let range_bin = (kr * window_size + kc) as usize;
+                let color_dist = (window_center_val - val as i32).abs() as f32;
+                let color_bin = ((color_dist * color_dist_scale) as usize).min(max_color_bin);
+                let weight = range_lut[range_bin] * color_lut[color_bin];
+                total_val += val as f32 * weight;
+                total_weight += weight;
             }
-            let new_val = (total_val / total_weight).round() as u8;
-            out.put_pixel(col as u32, row as u32, Luma([new_val]));
         }
-    }
-    out
+        let new_val = (total_val / total_weight).round() as u8;
+        Luma([new_val])
+    })
 }
 
 /// Convolves an 8bpp grayscale image with a kernel of width (2 * `x_radius` + 1)
@@ -575,6 +562,20 @@ mod tests {
         });
     }
 
+    #[test]
+    fn test_bilateral_filter() {
+        let image = gray_image!(
+            1, 2, 3;
+            4, 5, 6;
+            7, 8, 9);
+        let expect = gray_image!(
+            2, 3, 4;
+            5, 5, 6;
+            6, 7, 8);
+        let actual = bilateral_filter(&image, 3, 10., 3.);
+        assert_pixels_eq!(expect, actual);
+    }
+
     #[test]
     fn test_box_filter_handles_empty_images() {
         let _ = box_filter(&GrayImage::new(0, 0), 3, 3);

diff --git a/src/geometry.rs b/src/geometry.rs
@@ -185,14 +185,14 @@ where
 /// Finds the convex hull of a set of points, using the [Graham scan algorithm].
 ///
 /// [Graham scan algorithm]: https://en.wikipedia.org/wiki/Graham_scan
-pub fn convex_hull<T>(points_slice: &[Point<T>]) -> Vec<Point<T>>
+pub fn convex_hull<T>(points: impl Into<Vec<Point<T>>>) -> Vec<Point<T>>
 where
     T: NumCast + Copy + Ord,
 {
-    if points_slice.is_empty() {
-        return Vec::new();
+    let mut points = points.into();
+    if points.is_empty() {
+        return vec![];
     }
-    let mut points: Vec<Point<T>> = points_slice.to_vec();
     let mut start_point_pos = 0;
     let mut start_point = points[0];
     for (i, &point) in points.iter().enumerate().skip(1) {
@@ -320,7 +320,7 @@ mod tests {
             Point::new(140, 30),
             Point::new(120, 35),
         ];
-        let points = convex_hull(&star);
+        let points = convex_hull(star);
         assert_eq!(
             points,
             [
@@ -353,7 +353,7 @@ mod tests {
             Point::new(140, -10),
             Point::new(120, -5),
         ];
-        let points = convex_hull(&star);
+        let points = convex_hull(star);
         assert_eq!(
             points,
             [

diff --git a/src/haar.rs b/src/haar.rs
@@ -262,7 +262,7 @@ fn feature_size(feature_type: HaarFeatureType, block_size: Size<Pixels>) -> Size
 pub fn enumerate_haar_features(frame_width: u8, frame_height: u8) -> Vec<HaarFeature> {
     let frame_size = Size::new(frame_width, frame_height);
 
-    let feature_types = vec![
+    let feature_types = [
         HaarFeatureType::TwoRegionHorizontal,
         HaarFeatureType::ThreeRegionHorizontal,
         HaarFeatureType::TwoRegionVertical,

diff --git a/src/integral_image.rs b/src/integral_image.rs
@@ -106,14 +106,18 @@ where
     let out_width = in_width + 1;
     let out_height = in_height + 1;
 
-    let mut out = Image::<ChannelMap<P, T>>::new(out_width, out_height);
+    let mut out = Image::new(out_width, out_height);
 
     if in_width == 0 || in_height == 0 {
         return out;
     }
 
+    let zero = T::zero();
+    let mut sum = vec![zero; P::CHANNEL_COUNT as usize];
     for y in 0..in_height {
-        let mut sum = vec![T::zero(); P::CHANNEL_COUNT as usize];
+        sum.iter_mut().for_each(|x| {
+            *x = zero;
+        });
         for x in 0..in_width {
             // JUSTIFICATION
             //  Benefit
@@ -122,8 +126,8 @@ where
             //  Correctness
             //      x and y are within bounds by definition of in_width and in_height
             let input = unsafe { image.unsafe_get_pixel(x, y) };
-            for (s, c) in sum.iter_mut().zip(input.channels()) {
-                let pix: T = (*c).into();
+            for (s, &c) in sum.iter_mut().zip(input.channels()) {
+                let pix: T = c.into();
                 *s += if square { pix * pix } else { pix };
             }
 
@@ -138,8 +142,8 @@ where
             // pixel here we need to use the method with bounds checking
             let current = out.get_pixel_mut(x + 1, y + 1);
             // Using zip here makes this slower.
-            for c in 0..P::CHANNEL_COUNT {
-                current.channels_mut()[c as usize] = above.channels()[c as usize] + sum[c as usize];
+            for c in 0..P::CHANNEL_COUNT as usize {
+                current.channels_mut()[c] = above.channels()[c] + sum[c];
             }
         }
     }