Skip to content

Commit

Permalink
Refactor integer to float conversion
Browse files Browse the repository at this point in the history
Extract some common routines to separate functions in order to
deduplicate code and remove some of the magic.
  • Loading branch information
tgross35 committed Sep 29, 2024
1 parent de74b28 commit d467e20
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 48 deletions.
173 changes: 127 additions & 46 deletions src/float/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,73 +6,160 @@ use super::Float;

/// Conversions from integers to floats.
///
/// These are hand-optimized bit twiddling code,
/// which unfortunately isn't the easiest kind of code to read.
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
/// mantissa _with the implicit bit set_!
/// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns are used
/// to simplify this. Adjust the mantissa if needed.
/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros) and subtract one.
/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
/// from the exponent (above) accounts for the explicit bit being set in the mantissa.
///
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>
/// # Terminology
///
/// - `i`: the original integer
/// - `i_m`: the integer, shifted fully left (no leading zeros)
/// - `n`: number of leading zeroes
/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
/// - `m`: the resulting mantissa
/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
/// - `adj`: the bits that will be truncated, possibly compressed in some way.
mod int_to_float {
use super::*;

/// Calculate the exponent from the number of leading zeros.
///
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
/// bit set can be added back later.
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
}

/// Adjust a mantissa with dropped bits to perform correct rounding.
///
/// The dropped bits should be exactly the bits that get truncated (left-aligned),
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
// Branchlessly extract a `1` if rounding up should happen, 0 otherwise
// This accounts for rounding to even.
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);

// Add one when we need to round up. Break ties to even.
m_base + adj
}

/// Shift the exponent to its position and add the mantissa.
///
/// If the mantissa has the implicit bit set, the exponent should be one less than its actual
/// value to cancel it out.
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
// + rather than | so the mantissa can overflow into the exponent
(e << F::SIGNIFICAND_BITS) + m
}

/// Shift distance for a left-aligned integer to a smaller float.
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
(I::BITS - F::BITS) + F::EXPONENT_BITS
}

/// Shift distance for an integer with `n` leading zeros to a smaller float.
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
F::SIGNIFICAND_BITS - I::BITS + 1 + n
}

/// Perform a signed operation as unsigned, then add the sign back.
pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
where
F: Float,
I: Int,
F::Int: CastFrom<I>,
Conv: Fn(I::UnsignedInt) -> F::Int,
{
let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
F::from_repr(conv(i.unsigned_abs()) | sign_bit)
}

pub fn u32_to_f32_bits(i: u32) -> u32 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
let e = 157 - n; // Exponent plus 127, minus one.
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
// Mantissa with implicit bit set
let m_base = (i << n) >> f32::EXPONENT_BITS;
// Bits that will be dropped
let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
let m = m_adj::<f32>(m_base, adj);
let e = exp::<u32, f32>(n) - 1;
repr::<f32>(e, m)
}

pub fn u32_to_f64_bits(i: u32) -> u64 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
let e = 1053 - n as u64; // Exponent plus 1023, minus one.
(e << 52) + m // Bit 53 of m will overflow into e.
// Mantissa with implicit bit set
let m = (i as u64) << shift_f_gt_i::<u32, f64>(n);
let e = exp::<u32, f64>(n) - 1;
repr::<f64>(e, m)
}

pub fn u64_to_f32_bits(i: u64) -> u32 {
let n = i.leading_zeros();
let y = i.wrapping_shl(n);
let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
let i_m = i.wrapping_shl(n);
// Mantissa with implicit bit set
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXPONENT_BITS` bits.
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
let m = m_adj::<f32>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
repr::<f32>(e, m)
}

pub fn u64_to_f64_bits(i: u64) -> u64 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact.
let b = (i << n) << 53; // Insignificant bits, only relevant for rounding.
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
let e = 1085 - n as u64; // Exponent plus 1023, minus one.
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
// Mantissa with implicit bit set
let m_base = (i << n) >> f64::EXPONENT_BITS;
let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
let m = m_adj::<f64>(m_base, adj);
let e = exp::<u64, f64>(n) - 1;
repr::<f64>(e, m)
}

pub fn u128_to_f32_bits(i: u128) -> u32 {
let n = i.leading_zeros();
let y = i.wrapping_shl(n);
let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
let m_base: u32 = (i_m >> shift_f_lt_i::<u128, f32>()) as u32;

// Within the upper `F::BITS`, everything except for the signifcand
// gets truncated
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();

// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
// check if it is nonzero.
let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
let adj = d1 | d2;

// Mantissa with implicit bit set
let m = m_adj::<f32>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
repr::<f32>(e, m)
}

pub fn u128_to_f64_bits(i: u128) -> u64 {
let n = i.leading_zeros();
let y = i.wrapping_shl(n);
let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
let i_m = i.wrapping_shl(n);
// Mantissa with implicit bit set
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXPONENT_BITS` bits.
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
let m = m_adj::<f64>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
repr::<f64>(e, m)
}
}

Expand Down Expand Up @@ -113,38 +200,32 @@ intrinsics! {
intrinsics! {
#[arm_aeabi_alias = __aeabi_i2f]
pub extern "C" fn __floatsisf(i: i32) -> f32 {
let sign_bit = ((i >> 31) as u32) << 31;
f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
int_to_float::signed(i, int_to_float::u32_to_f32_bits)
}

#[arm_aeabi_alias = __aeabi_i2d]
pub extern "C" fn __floatsidf(i: i32) -> f64 {
let sign_bit = ((i >> 31) as u64) << 63;
f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
int_to_float::signed(i, int_to_float::u32_to_f64_bits)
}

#[arm_aeabi_alias = __aeabi_l2f]
pub extern "C" fn __floatdisf(i: i64) -> f32 {
let sign_bit = ((i >> 63) as u32) << 31;
f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
int_to_float::signed(i, int_to_float::u64_to_f32_bits)
}

#[arm_aeabi_alias = __aeabi_l2d]
pub extern "C" fn __floatdidf(i: i64) -> f64 {
let sign_bit = ((i >> 63) as u64) << 63;
f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
int_to_float::signed(i, int_to_float::u64_to_f64_bits)
}

#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floattisf(i: i128) -> f32 {
let sign_bit = ((i >> 127) as u32) << 31;
f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
int_to_float::signed(i, int_to_float::u128_to_f32_bits)
}

#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floattidf(i: i128) -> f64 {
let sign_bit = ((i >> 127) as u64) << 63;
f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
int_to_float::signed(i, int_to_float::u128_to_f64_bits)
}
}

Expand Down
10 changes: 9 additions & 1 deletion src/int/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt

fn unsigned(self) -> Self::UnsignedInt;
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
fn unsigned_abs(self) -> Self::UnsignedInt;

fn from_bool(b: bool) -> Self;

Expand Down Expand Up @@ -178,7 +179,6 @@ macro_rules! int_impl_common {
fn wrapping_mul(self, other: Self) -> Self {
<Self>::wrapping_mul(self, other)
}

fn wrapping_sub(self, other: Self) -> Self {
<Self>::wrapping_sub(self, other)
}
Expand Down Expand Up @@ -235,6 +235,10 @@ macro_rules! int_impl {
me
}

fn unsigned_abs(self) -> Self {
self
}

fn abs_diff(self, other: Self) -> Self {
if self < other {
other.wrapping_sub(self)
Expand Down Expand Up @@ -268,6 +272,10 @@ macro_rules! int_impl {
me as $ity
}

fn unsigned_abs(self) -> Self::UnsignedInt {
self.unsigned_abs()
}

fn abs_diff(self, other: Self) -> $uty {
self.wrapping_sub(other).wrapping_abs() as $uty
}
Expand Down
2 changes: 1 addition & 1 deletion testcrate/tests/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use compiler_builtins::float::Float;
use rustc_apfloat::{Float as _, FloatConvert as _};
use testcrate::*;

mod int_to_float {
mod i_to_f {
use super::*;

macro_rules! i_to_f {
Expand Down

0 comments on commit d467e20

Please sign in to comment.