diff --git a/src/float/conv.rs b/src/float/conv.rs
index d275f982b..6525303e4 100644
--- a/src/float/conv.rs
+++ b/src/float/conv.rs
@@ -6,21 +6,90 @@ use super::Float;
/// Conversions from integers to floats.
///
-/// These are hand-optimized bit twiddling code,
-/// which unfortunately isn't the easiest kind of code to read.
+/// The algorithm is explained here: . It roughly does the following:
+/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
+/// mantissa _with the implicit bit set_!
+/// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns are used
+/// to simplify this. Adjust the mantissa if needed.
+/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros) and subtract one.
+/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
+/// from the exponent (above) accounts for the explicit bit being set in the mantissa.
///
-/// The algorithm is explained here:
+/// # Terminology
+///
+/// - `i`: the original integer
+/// - `i_m`: the integer, shifted fully left (no leading zeros)
+/// - `n`: number of leading zeroes
+/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
+/// - `m`: the resulting mantissa
+/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
+/// - `adj`: the bits that will be truncated, possibly compressed in some way.
mod int_to_float {
+ use super::*;
+
+ /// Calculate the exponent from the number of leading zeros.
+ ///
+ /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
+ /// bit set can be added back later.
+ fn exp>>(n: u32) -> F::Int {
+ F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
+ }
+
+ /// Adjust a mantissa with dropped bits to perform correct rounding.
+ ///
+ /// The dropped bits should be exactly the bits that get truncated (left-aligned),
+ fn m_adj(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
+ // Branchlessly extract a `1` if rounding up should happen, 0 otherwise
+ // This accounts for rounding to even.
+ let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
+
+ // Add one when we need to round up. Break ties to even.
+ m_base + adj
+ }
+
+ /// Shift the exponent to its position and add the mantissa.
+ ///
+ /// If the mantissa has the implicit bit set, the exponent should be one less than its actual
+ /// value to cancel it out.
+ fn repr(e: F::Int, m: F::Int) -> F::Int {
+ // + rather than | so the mantissa can overflow into the exponent
+ (e << F::SIGNIFICAND_BITS) + m
+ }
+
+ /// Shift distance for a left-aligned integer to a smaller float.
+ fn shift_f_lt_i() -> u32 {
+ (I::BITS - F::BITS) + F::EXPONENT_BITS
+ }
+
+ /// Shift distance for an integer with `n` leading zeros to a smaller float.
+ fn shift_f_gt_i(n: u32) -> u32 {
+ F::SIGNIFICAND_BITS - I::BITS + 1 + n
+ }
+
+ /// Perform a signed operation as unsigned, then add the sign back.
+ pub fn signed(i: I, conv: Conv) -> F
+ where
+ F: Float,
+ I: Int,
+ F::Int: CastFrom,
+ Conv: Fn(I::UnsignedInt) -> F::Int,
+ {
+ let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
+ F::from_repr(conv(i.unsigned_abs()) | sign_bit)
+ }
+
pub fn u32_to_f32_bits(i: u32) -> u32 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
- let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
- let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
- let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
- let e = 157 - n; // Exponent plus 127, minus one.
- (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+ // Mantissa with implicit bit set
+ let m_base = (i << n) >> f32::EXPONENT_BITS;
+ // Bits that will be dropped
+ let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
+ let m = m_adj::(m_base, adj);
+ let e = exp::(n) - 1;
+ repr::(e, m)
}
pub fn u32_to_f64_bits(i: u32) -> u64 {
@@ -28,19 +97,23 @@ mod int_to_float {
return 0;
}
let n = i.leading_zeros();
- let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
- let e = 1053 - n as u64; // Exponent plus 1023, minus one.
- (e << 52) + m // Bit 53 of m will overflow into e.
+ // Mantissa with implicit bit set
+ let m = (i as u64) << shift_f_gt_i::(n);
+ let e = exp::(n) - 1;
+ repr::(e, m)
}
pub fn u64_to_f32_bits(i: u64) -> u32 {
let n = i.leading_zeros();
- let y = i.wrapping_shl(n);
- let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
- let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
- let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
- let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
- (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+ let i_m = i.wrapping_shl(n);
+ // Mantissa with implicit bit set
+ let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32;
+ // The entire lower half of `i` will be truncated (masked portion), plus the
+ // next `EXPONENT_BITS` bits.
+ let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
+ let m = m_adj::(m_base, adj);
+ let e = if i == 0 { 0 } else { exp::(n) - 1 };
+ repr::(e, m)
}
pub fn u64_to_f64_bits(i: u64) -> u64 {
@@ -48,31 +121,45 @@ mod int_to_float {
return 0;
}
let n = i.leading_zeros();
- let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact.
- let b = (i << n) << 53; // Insignificant bits, only relevant for rounding.
- let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
- let e = 1085 - n as u64; // Exponent plus 1023, minus one.
- (e << 52) + m // + not |, so the mantissa can overflow into the exponent.
+ // Mantissa with implicit bit set
+ let m_base = (i << n) >> f64::EXPONENT_BITS;
+ let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
+ let m = m_adj::(m_base, adj);
+ let e = exp::(n) - 1;
+ repr::(e, m)
}
pub fn u128_to_f32_bits(i: u128) -> u32 {
let n = i.leading_zeros();
- let y = i.wrapping_shl(n);
- let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
- let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
- let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
- let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
- (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+ let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
+ let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32;
+
+ // Within the upper `F::BITS`, everything except for the signifcand
+ // gets truncated
+ let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
+
+ // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
+ // check if it is nonzero.
+ let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
+ let adj = d1 | d2;
+
+ // Mantissa with implicit bit set
+ let m = m_adj::(m_base, adj);
+ let e = if i == 0 { 0 } else { exp::(n) - 1 };
+ repr::(e, m)
}
pub fn u128_to_f64_bits(i: u128) -> u64 {
let n = i.leading_zeros();
- let y = i.wrapping_shl(n);
- let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
- let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
- let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
- let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
- (e << 52) + m // + not |, so the mantissa can overflow into the exponent.
+ let i_m = i.wrapping_shl(n);
+ // Mantissa with implicit bit set
+ let m_base: u64 = (i_m >> shift_f_lt_i::()) as u64;
+ // The entire lower half of `i` will be truncated (masked portion), plus the
+ // next `EXPONENT_BITS` bits.
+ let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
+ let m = m_adj::(m_base, adj);
+ let e = if i == 0 { 0 } else { exp::(n) - 1 };
+ repr::(e, m)
}
}
@@ -113,38 +200,32 @@ intrinsics! {
intrinsics! {
#[arm_aeabi_alias = __aeabi_i2f]
pub extern "C" fn __floatsisf(i: i32) -> f32 {
- let sign_bit = ((i >> 31) as u32) << 31;
- f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
+ int_to_float::signed(i, int_to_float::u32_to_f32_bits)
}
#[arm_aeabi_alias = __aeabi_i2d]
pub extern "C" fn __floatsidf(i: i32) -> f64 {
- let sign_bit = ((i >> 31) as u64) << 63;
- f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
+ int_to_float::signed(i, int_to_float::u32_to_f64_bits)
}
#[arm_aeabi_alias = __aeabi_l2f]
pub extern "C" fn __floatdisf(i: i64) -> f32 {
- let sign_bit = ((i >> 63) as u32) << 31;
- f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
+ int_to_float::signed(i, int_to_float::u64_to_f32_bits)
}
#[arm_aeabi_alias = __aeabi_l2d]
pub extern "C" fn __floatdidf(i: i64) -> f64 {
- let sign_bit = ((i >> 63) as u64) << 63;
- f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
+ int_to_float::signed(i, int_to_float::u64_to_f64_bits)
}
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floattisf(i: i128) -> f32 {
- let sign_bit = ((i >> 127) as u32) << 31;
- f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
+ int_to_float::signed(i, int_to_float::u128_to_f32_bits)
}
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floattidf(i: i128) -> f64 {
- let sign_bit = ((i >> 127) as u64) << 63;
- f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
+ int_to_float::signed(i, int_to_float::u128_to_f64_bits)
}
}
diff --git a/src/int/mod.rs b/src/int/mod.rs
index 5f56c6b6e..d5f91d1d0 100644
--- a/src/int/mod.rs
+++ b/src/int/mod.rs
@@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt
fn unsigned(self) -> Self::UnsignedInt;
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
+ fn unsigned_abs(self) -> Self::UnsignedInt;
fn from_bool(b: bool) -> Self;
@@ -178,7 +179,6 @@ macro_rules! int_impl_common {
fn wrapping_mul(self, other: Self) -> Self {
::wrapping_mul(self, other)
}
-
fn wrapping_sub(self, other: Self) -> Self {
::wrapping_sub(self, other)
}
@@ -235,6 +235,10 @@ macro_rules! int_impl {
me
}
+ fn unsigned_abs(self) -> Self {
+ self
+ }
+
fn abs_diff(self, other: Self) -> Self {
if self < other {
other.wrapping_sub(self)
@@ -268,6 +272,10 @@ macro_rules! int_impl {
me as $ity
}
+ fn unsigned_abs(self) -> Self::UnsignedInt {
+ self.unsigned_abs()
+ }
+
fn abs_diff(self, other: Self) -> $uty {
self.wrapping_sub(other).wrapping_abs() as $uty
}
diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs
index 609680387..01cc588cf 100644
--- a/testcrate/tests/conv.rs
+++ b/testcrate/tests/conv.rs
@@ -8,7 +8,7 @@ use compiler_builtins::float::Float;
use rustc_apfloat::{Float as _, FloatConvert as _};
use testcrate::*;
-mod int_to_float {
+mod i_to_f {
use super::*;
macro_rules! i_to_f {