Skip to content

Commit

Permalink
Merge pull request #16 from vincentlaucsb/double-parse-fix
Browse files Browse the repository at this point in the history
Fixed floating point parsing bug #15
  • Loading branch information
vincentlaucsb authored Mar 26, 2019
2 parents a26a6bc + abaae6e commit ed53857
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 35 deletions.
63 changes: 29 additions & 34 deletions src/data_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ namespace csv::internals {
};
#endif

const long double _INT_MAX = (long double)std::numeric_limits<int>::max();
const long double _LONG_MAX = (long double)std::numeric_limits<long int>::max();
const long double _LONG_LONG_MAX = (long double)std::numeric_limits<long long int>::max();

DataType data_type(std::string_view in, long double* const out) {
/** Distinguishes numeric from other text values. Used by various
* type casting functions, like csv_parser::CSVReader::read_row()
Expand All @@ -43,7 +47,8 @@ namespace csv::internals {
bool prob_float = false;

unsigned places_after_decimal = 0;
long double num_buff = 0;
long double integral_part = 0,
decimal_part = 0;

for (size_t i = 0, ilen = in.size(); i < ilen; i++) {
const char& current = in[i];
Expand All @@ -66,18 +71,16 @@ namespace csv::internals {
// Ex: '510-123-4567'
return CSV_STRING;
}
else {
neg_allowed = false;
}

neg_allowed = false;
break;
case '.':
if (!dot_allowed) {
return CSV_STRING;
}
else {
dot_allowed = false;
prob_float = true;
}

dot_allowed = false;
prob_float = true;
break;
default:
if (isdigit(current)) {
Expand All @@ -91,15 +94,12 @@ namespace csv::internals {

// Build current number
unsigned digit = current - '0';
if (num_buff == 0) {
num_buff = digit;
}
else if (prob_float) {
num_buff += (long double)digit / pow(10.0, ++places_after_decimal);
if (prob_float) {
places_after_decimal++;
decimal_part = (decimal_part * 10) + digit;
}
else {
num_buff *= 10;
num_buff += digit;
integral_part = (integral_part * 10) + digit;
}
}
else {
Expand All @@ -110,29 +110,24 @@ namespace csv::internals {

// No non-numeric/non-whitespace characters found
if (has_digit) {
if (!neg_allowed) num_buff *= -1;
if (out) *out = num_buff;
long double number = integral_part + decimal_part * pow(10, -(double)places_after_decimal);
if (out) *out = neg_allowed ? number : -number;

if (prob_float)
return CSV_DOUBLE;
else {
long double log10_num_buff;
if (!neg_allowed) log10_num_buff = log10(-num_buff);
else log10_num_buff = log10(num_buff);

if (log10_num_buff < log10(std::numeric_limits<int>::max()))
return CSV_INT;
else if (log10_num_buff < log10(std::numeric_limits<long int>::max()))
return CSV_LONG_INT;
else if (log10_num_buff < log10(std::numeric_limits<long long int>::max()))
return CSV_LONG_LONG_INT;
else // Conversion to long long will cause an overflow
return CSV_DOUBLE;
}
}
else {
// Just whitespace
return CSV_NULL;
// We can assume number is always positive
if (number < _INT_MAX)
return CSV_INT;
else if (number < _LONG_MAX)
return CSV_LONG_INT;
else if (number < _LONG_LONG_MAX)
return CSV_LONG_LONG_INT;
else // Conversion to long long will cause an overflow
return CSV_DOUBLE;
}

// Just whitespace
return CSV_NULL;
}
}
38 changes: 38 additions & 0 deletions tests/test_data_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,42 @@ TEST_CASE( "Recognize Floats Properly", "[dtype_float]" ) {

REQUIRE(data_type(e, &out) == CSV_DOUBLE);
REQUIRE(is_equal(out, 2.71828));
}

TEST_CASE("Integer Overflow", "[int_overflow]") {
const long double _INT_MAX = (long double)std::numeric_limits<int>::max();
const long double _LONG_MAX = (long double)std::numeric_limits<long int>::max();
const long double _LONG_LONG_MAX = (long double)std::numeric_limits<long long int>::max();

std::string s;
long double out;

s = std::to_string((long long)_INT_MAX + 1);
if (_INT_MAX == _LONG_MAX) {
REQUIRE(data_type(s, &out) == CSV_LONG_LONG_INT);
}
else {
REQUIRE(data_type(s, &out) == CSV_LONG_INT);
}

REQUIRE(out == (long long)_INT_MAX + 1);
}

TEST_CASE( "Recognize Sub-Unit Double Values", "[regression_double]" ) {
std::string s("0.15");
long double out;
REQUIRE(data_type(s, &out) == CSV_DOUBLE);
REQUIRE(is_equal(out, 0.15));
}

TEST_CASE( "Recognize Double Values", "[regression_double2]" ) {
// Test converting double values back and forth
long double out;
std::string s;

for (double i = 0; i <= 2.0; i += 0.01) {
s = std::to_string(i);
REQUIRE(data_type(s, &out) == CSV_DOUBLE);
REQUIRE(is_equal(out, i));
}
}
3 changes: 2 additions & 1 deletion tests/test_read_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,9 @@ TEST_CASE("Test read_row() CSVField - Memory", "[read_row_csvf2]") {
// Fourth Row
rows.pop_front();
row = rows.front();
double big_num_csv = row[0].get<double>();
REQUIRE(row[0].type() == CSV_DOUBLE); // Overflow
REQUIRE(internals::is_equal(row[0].get<double>(), big_num));
REQUIRE(internals::is_equal(big_num_csv, big_num));
}

TEST_CASE("Test read_row() CSVField - Power Status", "[read_row_csvf3]") {
Expand Down

0 comments on commit ed53857

Please sign in to comment.