From 7385a8f80cdd22511c1b58f3311879d9bb07bbc8 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 22 Dec 2022 17:47:25 +0900 Subject: [PATCH] Supporting PartialCmp in VectorColumn. (#1735) * Supporting PartialCmp in VectorColumn. * Apply suggestions from code review Co-authored-by: PSeitz --- bitpacker/src/lib.rs | 122 +++++++++++++++++++++++++-------- fastfield_codecs/src/column.rs | 2 +- 2 files changed, 93 insertions(+), 31 deletions(-) diff --git a/bitpacker/src/lib.rs b/bitpacker/src/lib.rs index 07e3de4617..b436f45ddf 100644 --- a/bitpacker/src/lib.rs +++ b/bitpacker/src/lib.rs @@ -1,6 +1,8 @@ mod bitpacker; mod blocked_bitpacker; +use std::cmp::Ordering; + pub use crate::bitpacker::{BitPacker, BitUnpacker}; pub use crate::blocked_bitpacker::BlockedBitpacker; @@ -37,44 +39,104 @@ pub fn compute_num_bits(n: u64) -> u8 { } } +/// Computes the (min, max) of an iterator of `PartialOrd` values. +/// +/// For values implementing `Ord` (in a way consistent to their `PartialOrd` impl), +/// this function behaves as expected. +/// +/// For values with partial ordering, the behavior is non-trivial and may +/// depends on the order of the values. +/// For floats however, it simply returns the same results as if NaN were +/// skipped. pub fn minmax(mut vals: I) -> Option<(T, T)> where I: Iterator, - T: Copy + Ord, + T: Copy + PartialOrd, { - if let Some(first_el) = vals.next() { - return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| { - (min_val.min(el), max_val.max(el)) - })); + let first_el = vals.find(|val| { + // We use this to make sure we skip all NaN values when + // working with a float type. + val.partial_cmp(val) == Some(Ordering::Equal) + })?; + let mut min_so_far: T = first_el; + let mut max_so_far: T = first_el; + for val in vals { + if val.partial_cmp(&min_so_far) == Some(Ordering::Less) { + min_so_far = val; + } + if val.partial_cmp(&max_so_far) == Some(Ordering::Greater) { + max_so_far = val; + } } - None + Some((min_so_far, max_so_far)) } -#[test] -fn test_compute_num_bits() { - assert_eq!(compute_num_bits(1), 1u8); - assert_eq!(compute_num_bits(0), 0u8); - assert_eq!(compute_num_bits(2), 2u8); - assert_eq!(compute_num_bits(3), 2u8); - assert_eq!(compute_num_bits(4), 3u8); - assert_eq!(compute_num_bits(255), 8u8); - assert_eq!(compute_num_bits(256), 9u8); - assert_eq!(compute_num_bits(5_000_000_000), 33u8); -} +#[cfg(test)] +mod tests { + use super::*; -#[test] -fn test_minmax_empty() { - let vals: Vec = vec![]; - assert_eq!(minmax(vals.into_iter()), None); -} + #[test] + fn test_compute_num_bits() { + assert_eq!(compute_num_bits(1), 1u8); + assert_eq!(compute_num_bits(0), 0u8); + assert_eq!(compute_num_bits(2), 2u8); + assert_eq!(compute_num_bits(3), 2u8); + assert_eq!(compute_num_bits(4), 3u8); + assert_eq!(compute_num_bits(255), 8u8); + assert_eq!(compute_num_bits(256), 9u8); + assert_eq!(compute_num_bits(5_000_000_000), 33u8); + } -#[test] -fn test_minmax_one() { - assert_eq!(minmax(vec![1].into_iter()), Some((1, 1))); -} + #[test] + fn test_minmax_empty() { + let vals: Vec = vec![]; + assert_eq!(minmax(vals.into_iter()), None); + } -#[test] -fn test_minmax_two() { - assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2))); - assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2))); + #[test] + fn test_minmax_one() { + assert_eq!(minmax(vec![1].into_iter()), Some((1, 1))); + } + + #[test] + fn test_minmax_two() { + assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2))); + assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2))); + } + + #[test] + fn test_minmax_nan() { + assert_eq!( + minmax(vec![f64::NAN, 1f64, 2f64].into_iter()), + Some((1f64, 2f64)) + ); + assert_eq!( + minmax(vec![2f64, f64::NAN, 1f64].into_iter()), + Some((1f64, 2f64)) + ); + assert_eq!( + minmax(vec![2f64, 1f64, f64::NAN].into_iter()), + Some((1f64, 2f64)) + ); + } + + #[test] + fn test_minmax_inf() { + assert_eq!( + minmax(vec![f64::INFINITY, 1f64, 2f64].into_iter()), + Some((1f64, f64::INFINITY)) + ); + assert_eq!( + minmax(vec![-f64::INFINITY, 1f64, 2f64].into_iter()), + Some((-f64::INFINITY, 2f64)) + ); + assert_eq!( + minmax(vec![2f64, f64::INFINITY, 1f64].into_iter()), + Some((1f64, f64::INFINITY)) + ); + assert_eq!( + minmax(vec![2f64, 1f64, -f64::INFINITY].into_iter()), + Some((-f64::INFINITY, 2f64)) + ); + } } diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs index 52a6a18418..33fee8af3f 100644 --- a/fastfield_codecs/src/column.rs +++ b/fastfield_codecs/src/column.rs @@ -135,7 +135,7 @@ impl<'a, T: Copy + PartialOrd + Send + Sync> Column for VecColumn<'a, T> { } } -impl<'a, T: Copy + Ord + Default, V> From<&'a V> for VecColumn<'a, T> +impl<'a, T: Copy + PartialOrd + Default, V> From<&'a V> for VecColumn<'a, T> where V: AsRef<[T]> + ?Sized { fn from(values: &'a V) -> Self {