Skip to content

Commit

Permalink
Support for NotNaN in fast fields
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Dec 22, 2022
1 parent bb48c3e commit 540a997
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 7 deletions.
1 change: 1 addition & 0 deletions fastfield_codecs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ fastdivide = "0.4"
log = "0.4"
itertools = { version = "0.10.3" }
measure_time = { version="0.8.2", optional=true}
ordered-float = "3.4"

[dev-dependencies]
more-asserts = "0.3.0"
Expand Down
2 changes: 2 additions & 0 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ mod column;
mod gcd;
mod serialize;

pub use ordered_float;

use self::bitpacked::BitpackedCodec;
use self::blockwise_linear::BlockwiseLinearCodec;
pub use self::column::{monotonic_map_column, Column, IterColumn, VecColumn};
Expand Down
34 changes: 34 additions & 0 deletions fastfield_codecs/src/monotonic_mapping.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::marker::PhantomData;

use fastdivide::DividerU64;
use ordered_float::NotNan;

use crate::MonotonicallyMappableToU128;

Expand Down Expand Up @@ -192,6 +193,8 @@ impl MonotonicallyMappableToU64 for bool {
}
}

// TODO remove me.
// Tantivy should refuse NaN values and work with NotNaN internally.
impl MonotonicallyMappableToU64 for f64 {
fn to_u64(self) -> u64 {
common::f64_to_u64(self)
Expand All @@ -202,11 +205,42 @@ impl MonotonicallyMappableToU64 for f64 {
}
}

impl MonotonicallyMappableToU64 for ordered_float::NotNan<f64> {
fn to_u64(self) -> u64 {
common::f64_to_u64(self.into_inner())
}

fn from_u64(val: u64) -> Self {
NotNan::new(common::u64_to_f64(val)).expect("Invalid NotNaN f64 value.")
}
}

#[cfg(test)]
mod tests {

use super::*;

#[test]
fn test_from_u64_pos_inf() {
let inf_as_u64 = common::f64_to_u64(f64::INFINITY);
let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
assert_eq!(inf_back_to_f64, NotNan::new(f64::INFINITY).unwrap());
}

#[test]
fn test_from_u64_neg_inf() {
let inf_as_u64 = common::f64_to_u64(-f64::INFINITY);
let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
assert_eq!(inf_back_to_f64, NotNan::new(-f64::INFINITY).unwrap());
}

#[test]
#[should_panic(expected = "Invalid NotNaN")]
fn test_from_u64_nan_panics() {
let nan_as_u64 = common::f64_to_u64(f64::NAN);
NotNan::from_u64(nan_as_u64);
}

#[test]
fn strictly_monotonic_test() {
// identity mapping
Expand Down
14 changes: 7 additions & 7 deletions fastfield_codecs/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,12 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
}

#[allow(dead_code)]
pub enum ValueIndexInfo {
MultiValue(Box<dyn MultiValueIndexInfo>),
SingleValue(Box<dyn SingleValueIndexInfo>),
pub enum ValueIndexInfo<'a> {
MultiValue(Box<dyn MultiValueIndexInfo + 'a>),
SingleValue(Box<dyn SingleValueIndexInfo + 'a>),
}

impl Default for ValueIndexInfo {
impl Default for ValueIndexInfo<'static> {
fn default() -> Self {
struct Dummy {}
impl SingleValueIndexInfo for Dummy {
Expand All @@ -221,7 +221,7 @@ impl Default for ValueIndexInfo {
}
}

impl ValueIndexInfo {
impl<'a> ValueIndexInfo<'a> {
fn get_cardinality(&self) -> FastFieldCardinality {
match self {
ValueIndexInfo::MultiValue(_) => FastFieldCardinality::Multi,
Expand All @@ -236,7 +236,7 @@ pub trait MultiValueIndexInfo {
/// The number of values in the column.
fn num_vals(&self) -> u32;
/// Return the start index of the values for each doc
fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
}

pub trait SingleValueIndexInfo {
Expand All @@ -245,7 +245,7 @@ pub trait SingleValueIndexInfo {
/// The number of non-null values in the column.
fn num_non_nulls(&self) -> u32;
/// Return a iterator of the positions of docs with a value
fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
}

/// Serializes u128 values with the compact space codec.
Expand Down

0 comments on commit 540a997

Please sign in to comment.