Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve float conversion #1621

Merged
merged 2 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion include/clasp/core/bignum.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ class Bignum_O : public Integer_O {
static Bignum_sp create(int64_t v) { return create_from_limbs((v < 0) ? -1 : 1, std::abs(v), true); }
#endif
static Bignum_sp create(uint64_t v) { return create_from_limbs(1, v, true); }
static Bignum_sp create(__uint128_t v) {
Bignum_sp b = create_from_limbs(2);
b->_limbs[0] = static_cast<mp_limb_t>(v);
b->_limbs[1] = static_cast<mp_limb_t>(v >> 64);
return b;
}
static Bignum_sp create(double d) {
// KLUDGE: there is no mpn function for converting from a double.
// However, this conses, which we shouldn't need to do.
Expand Down Expand Up @@ -133,7 +139,26 @@ class Bignum_O : public Integer_O {
virtual bool evenp_() const override { return !((this->limbs())[0] & 1); }
virtual bool oddp_() const override { return (this->limbs())[0] & 1; }

template <std::integral integral> integral to_integral() const {
template <std::unsigned_integral integral> integral to_integral() const {
constexpr auto limb_width = sizeof(mp_limb_t) * 8;
constexpr auto integral_width = std::bit_width(std::numeric_limits<integral>::max());
constexpr auto max_limb_count = 1 + ((sizeof(integral) - 1) / sizeof(mp_limb_t));

const mp_limb_t* limbs = this->limbs();
mp_size_t len = this->length();

if ((len > -1) && ((limb_width * (len - 1) + std::bit_width(limbs[len - 1])) <= integral_width)) {
integral value = 0;
for (mp_size_t i = 0; i < length(); i++)
value |= (integral)limbs[i] << (i * limb_width);
return value;
}

TYPE_ERROR(this->asSmartPtr(), Cons_O::createList(cl::_sym_Integer_O, Integer_O::create(std::numeric_limits<integral>::min()),
Integer_O::create(std::numeric_limits<integral>::max())));
}

template <std::signed_integral integral> integral to_integral() const {
integral mn = std::numeric_limits<integral>::min();
integral mx = std::numeric_limits<integral>::max();
// First, if integral can only hold fixnums, conversion will always fail.
Expand Down
165 changes: 165 additions & 0 deletions include/clasp/core/float_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#pragma once

#include <fenv.h>

namespace core {

template <typename Float> struct float_convert {
static constexpr uint16_t significand_width = std::numeric_limits<Float>::digits;
static constexpr uint16_t exponent_width = std::bit_width((unsigned int)std::numeric_limits<Float>::max_exponent);
static constexpr uint16_t sign_width = 1;
static constexpr bool has_hidden_bit = ((sign_width + exponent_width + significand_width) % 8) != 0;
static constexpr uint16_t storage_width = sign_width + exponent_width + significand_width + ((has_hidden_bit) ? -1 : 0);
static constexpr int32_t exponent_bias = std::numeric_limits<Float>::max_exponent + significand_width - 2;
static constexpr int32_t max_exponent = std::numeric_limits<Float>::max_exponent - significand_width;
static constexpr int32_t min_exponent = 2 - exponent_bias - significand_width;
static constexpr int32_t min_normalized_exponent = 1 - exponent_bias;
using uint_t =
std::conditional_t<storage_width <= 8, uint8_t,
std::conditional_t<storage_width <= 16, uint16_t,
std::conditional_t<storage_width <= 32, uint32_t,
std::conditional_t<storage_width <= 64, uint64_t, __uint128_t>>>>;
static constexpr uint_t hidden_bit = has_hidden_bit ? uint_t{1} << (significand_width - 1) : uint_t{0};
static constexpr uint16_t exponent_shift = storage_width - sign_width - exponent_width;
static constexpr uint16_t sign_shift = storage_width - sign_width;
static constexpr uint_t significand_mask = (uint_t{1} << (significand_width + ((has_hidden_bit) ? -1 : 0))) - uint_t{1};
static constexpr uint_t payload_mask = (uint_t{1} << (significand_width + ((has_hidden_bit) ? -2 : -1))) - uint_t{1};
static constexpr int32_t non_finite_exponent = static_cast<int32_t>(((uint_t{1} << exponent_width) - uint_t{1}));
static constexpr uint_t exponent_mask = ((uint_t{1} << exponent_width) - uint_t{1}) << exponent_shift;
static constexpr uint_t sign_mask = ((uint_t{1} << sign_width) - uint_t{1}) << sign_shift;
static constexpr uint_t nan_type_mask = uint_t{1} << (significand_width + ((has_hidden_bit) ? -2 : -1));

enum class category { finite, quiet_nan, signaling_nan, infinity };

typedef union {
Float f;
uint_t b;
} convert_t;

static inline uint_t to_bits(Float f) {
convert_t convert = {.f = f};
return convert.b;
}

static inline Float from_bits(uint_t b) {
convert_t convert = {.b = b};
return convert.f;
}

struct quadruple {
category category;
uint_t significand;
int32_t exponent;
int16_t sign;
};

static quadruple to_quadruple(Float f) {
quadruple q;
uint_t b = to_bits(f);

q.significand = b & significand_mask;
q.exponent = static_cast<int32_t>((b & exponent_mask) >> exponent_shift);
q.sign = (b & sign_mask) ? int32_t{-1} : int32_t{1};

if (q.exponent != non_finite_exponent) {
q.category = category::finite;
if (q.exponent != 0) {
// Normal non-zero
q.significand |= hidden_bit;
q.exponent -= exponent_bias;
} else if (q.significand != 0) {
// Subnormal
int32_t shift = significand_width - std::bit_width(q.significand);
q.significand = q.significand << shift;
q.exponent = 1 - exponent_bias - shift;
}
} else if (q.significand == 0) {
q.category = category::infinity;
} else if (q.significand & nan_type_mask) {
q.category = category::quiet_nan;
q.significand &= ~nan_type_mask;
} else {
q.category = category::signaling_nan;
}

return q;
}

static Float from_quadruple(quadruple q) {
uint_t b = 0;

if (q.sign < 0)
b |= sign_mask;

switch (q.category) {
case category::infinity:
b |= exponent_mask;
break;
case category::quiet_nan:
b |= exponent_mask | nan_type_mask | (q.significand & payload_mask);
break;
case category::signaling_nan:
b |= exponent_mask | ((q.significand == 0) ? uint_t{1} : (q.significand & payload_mask));
break;
default:
if (q.significand != 0) {
uint_t significand = q.significand;
int32_t exponent = q.exponent;
int32_t shift = std::bit_width(significand) - significand_width;

// If we don't have enough bits then right shift.
if (shift < 0) {
significand = significand << -shift;
exponent += shift;
shift = 0;
}

// Check for subnormals and set the shift needed to normalize.
if ((exponent + shift) < min_normalized_exponent) {
shift = min_normalized_exponent - exponent;
}

// If we shift away all of the bits that is an underflow.
if (shift > std::bit_width(significand)) {
feraiseexcept(FE_UNDERFLOW);
// Return +/- zero if traps masked.
return from_bits(b);
}

// Round if we have extra bits.
if (shift > 0) {
if (significand & ((1 << shift) - 1))
feraiseexcept(FE_INEXACT);
significand = ((significand & (1 << (shift - 1))) ? (significand + (1 << shift)) : significand) >> shift;
exponent += shift;
}

// Check one more time to ensure rounding hasn't increased the width.
shift = std::max(static_cast<int>(std::bit_width(significand) - significand_width), 0);
significand = significand >> shift;
exponent += shift;

// Check for overflow.
if (exponent > max_exponent) {
feraiseexcept(FE_OVERFLOW);
// Return +/- infinity if traps masked.
return from_bits(b | exponent_mask);
}

if (std::bit_width(significand) < significand_width) {
// Subnormals
b |= significand & significand_mask;
} else {
// Normal
b |= (significand & significand_mask) |
((static_cast<uint_t>(exponent + exponent_bias) << exponent_shift) & exponent_mask);
}
}
break;
}

return from_bits(b);
};
};

} // namespace core
36 changes: 0 additions & 36 deletions include/clasp/core/foundation.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,46 +51,10 @@ THE SOFTWARE.

namespace core {
[[noreturn]] void lisp_throwLispError(const std::string& str);
[[noreturn]] void lisp_floating_point_invalid_operation();
[[noreturn]] void lisp_error_simple(const char* functionName, const char* fileName, int lineNumber, const string& fmt);
[[noreturn]] void lisp_error_simple(const char* functionName, const char* fileName, int lineNumber, const std::string& str);
void lisp_debugLogWrite(const char* fileName, const char* funcName, uint lineNumber, uint column, const std::string& message,
uint debugFlags = DEBUG_CPP_FUNCTION);

template <typename Float> struct float_convert {
static constexpr uint16_t significand_width = std::numeric_limits<Float>::digits;
static constexpr uint16_t exponent_width = std::bit_width((unsigned int)std::numeric_limits<Float>::max_exponent);
static constexpr uint16_t sign_width = 1;
static constexpr bool has_hidden_bit = ((sign_width + exponent_width + significand_width) % 8) != 0;
static constexpr uint16_t storage_width = sign_width + exponent_width + significand_width + ((has_hidden_bit) ? -1 : 0);
static constexpr int32_t exponent_bias = std::numeric_limits<Float>::max_exponent + significand_width - 2;
using uint_t =
std::conditional_t<storage_width <= 8, uint8_t,
std::conditional_t<storage_width <= 16, uint16_t,
std::conditional_t<storage_width <= 32, uint32_t,
std::conditional_t<storage_width <= 64, uint64_t, __uint128_t>>>>;
static constexpr uint16_t exponent_shift = storage_width - sign_width - exponent_width;
static constexpr uint16_t sign_shift = storage_width - sign_width;
static constexpr uint_t significand_mask = (uint_t{1} << (significand_width + ((has_hidden_bit) ? -1 : 0))) - uint_t{1};
static constexpr uint_t exponent_mask = ((uint_t{1} << exponent_width) - uint_t{1}) << exponent_shift;
static constexpr uint_t sign_mask = ((uint_t{1} << sign_width) - uint_t{1}) << sign_shift;

typedef union {
Float f;
uint_t b;
} convert_t;

static inline uint_t to_bits(Float f) {
convert_t convert = {.f = f};
return convert.b;
}

static inline Float from_bits(uint_t b) {
convert_t convert = {.b = b};
return convert.f;
}
};

}; // namespace core

template <typename Char> struct fmt::formatter<core::T_sp, Char> : fmt::formatter<fmt::basic_string_view<Char>> {
Expand Down
1 change: 1 addition & 0 deletions include/clasp/core/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ THE SOFTWARE.

#include <fmt/format.h>
#include <fmt/ostream.h>
#include <clasp/core/float_util.h>
#include <clasp/core/newhash.h>
#include <clasp/core/commonLispPackage.fwd.h>
#include <clasp/core/corePackage.fwd.h>
Expand Down
50 changes: 27 additions & 23 deletions src/core/bignum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -799,35 +799,39 @@ Number_sp Bignum_O::oneMinus_() const { return next_fadd(this->limbs(), this->le

Number_sp Bignum_O::onePlus_() const { return next_fadd(this->limbs(), this->length(), 1); }

double next_to_double(mp_size_t len, const mp_limb_t* limbs) {
// MPN does not seem to export a float conversion function,
// so we roll our own. FIXME: Could get bad with float rounding
// or otherwise as I'm making this up as I go.
template <typename Float> Float limbs_to_float(mp_size_t len, const mp_limb_t* limbs) {
constexpr size_t limb_width = sizeof(mp_limb_t) * 8;
mp_size_t size = std::abs(len);
struct float_convert<Float>::quadruple q = {
.category = float_convert<Float>::category::finite, .significand = 0, .exponent = (size - 1) * limb_width,
.sign = (len < 0) ? -1 : 1
};
size_t shift = float_convert<Float>::significand_width + 1;
size_t width = std::bit_width(limbs[size - 1]);

if (width >= shift) {
q.significand = limbs[size - 1] >> (width - shift);
q.exponent += width - shift;
} else {
q.significand = limbs[size - 1];
shift -= width;

for (mp_size_t i = size - 2; i > -1 && shift > 0; i--) {
size_t limb_shift = std::min(shift, limb_width);
q.significand = (q.significand << limb_shift) | (limbs[i] >> (limb_width - limb_shift));
q.exponent -= limb_shift;
shift -= limb_shift;
}
}

// There are no length zero bignums.
// If the bignum is only one long, we just convert directly.
if (len == 1)
return static_cast<double>(limbs[0]);
else if (len == -1)
return -static_cast<double>(limbs[0]);

// Otherwise, we just use the most significant two limbs.
// Assuming the float format has at most 128 bits of significand,
// the lower limbs ought to be irrelevant.
// (In fact one 64-bit number would probably be enough, but
// it's possible the high bits of this number are almost all zero.)
double ultimate = static_cast<double>(limbs[size - 1]);
double penultimate = static_cast<double>(limbs[size - 2]);
double soon = penultimate + std::ldexp(ultimate, 64);
return std::ldexp(((len < 0) ? -soon : soon), 64 * (size - 2));
return float_convert<Float>::from_quadruple(q);
}

float Bignum_O::as_float_() const { return static_cast<float>(mpz_get_d(this->mpz().get_mpz_t())); }
float Bignum_O::as_float_() const { return limbs_to_float<float>(this->length(), this->limbs()); }

double Bignum_O::as_double_() const { return mpz_get_d(this->mpz().get_mpz_t()); }
double Bignum_O::as_double_() const { return limbs_to_float<double>(this->length(), this->limbs()); }

LongFloat Bignum_O::as_long_float_() const { return static_cast<LongFloat>(next_to_double(this->length(), this->limbs())); }
LongFloat Bignum_O::as_long_float_() const { return limbs_to_float<LongFloat>(this->length(), this->limbs()); }

DOCGROUP(clasp);
CL_DEFUN int core__next_compare(Bignum_sp left, Bignum_sp right) {
Expand Down
4 changes: 0 additions & 4 deletions src/core/foundation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,6 @@ CL_DEFUN void core__dump_class_ids() { reg::dump_class_ids(); }

SYMBOL_EXPORT_SC_(ClPkg,floating_point_invalid_operation);

void lisp_floating_point_invalid_operation() {
lisp_error(cl::_sym_floating_point_invalid_operation,nil<core::T_O>());
}

void lisp_errorExpectedList(core::T_O* v) {
T_sp tv((gctools::Tagged)v);
TYPE_ERROR(tv, cl::_sym_list);
Expand Down
Loading
Loading