From 9cd5b02b7baaeda0122ec55bc84c7c671f71817c Mon Sep 17 00:00:00 2001 From: Vincent La Date: Mon, 25 Mar 2019 15:47:29 -0700 Subject: [PATCH 1/4] Fixed #15 as well as increasing performance --- src/data_type.cpp | 66 ++++++++++++++++++---------------------- tests/test_data_type.cpp | 38 +++++++++++++++++++++++ 2 files changed, 67 insertions(+), 37 deletions(-) diff --git a/src/data_type.cpp b/src/data_type.cpp index 70c49563..acddbbfd 100644 --- a/src/data_type.cpp +++ b/src/data_type.cpp @@ -20,6 +20,10 @@ namespace csv::internals { }; #endif + const long double _INT_MAX = (long double)std::numeric_limits::max(); + const long double _LONG_MAX = (long double)std::numeric_limits::max(); + const long double _LONG_LONG_MAX = (long double)std::numeric_limits::max(); + DataType data_type(std::string_view in, long double* const out) { /** Distinguishes numeric from other text values. Used by various * type casting functions, like csv_parser::CSVReader::read_row() @@ -43,7 +47,7 @@ namespace csv::internals { bool prob_float = false; unsigned places_after_decimal = 0; - long double num_buff = 0; + unsigned long significand = 0; for (size_t i = 0, ilen = in.size(); i < ilen; i++) { const char& current = in[i]; @@ -66,18 +70,16 @@ namespace csv::internals { // Ex: '510-123-4567' return CSV_STRING; } - else { - neg_allowed = false; - } + + neg_allowed = false; break; case '.': if (!dot_allowed) { return CSV_STRING; } - else { - dot_allowed = false; - prob_float = true; - } + + dot_allowed = false; + prob_float = true; break; default: if (isdigit(current)) { @@ -91,16 +93,11 @@ namespace csv::internals { // Build current number unsigned digit = current - '0'; - if (num_buff == 0) { - num_buff = digit; - } - else if (prob_float) { - num_buff += (long double)digit / pow(10.0, ++places_after_decimal); - } - else { - num_buff *= 10; - num_buff += digit; + if (prob_float) { + places_after_decimal++; } + + significand = (significand * 10) + digit; } else { return CSV_STRING; @@ -110,29 +107,24 @@ namespace csv::internals { // No non-numeric/non-whitespace characters found if (has_digit) { - if (!neg_allowed) num_buff *= -1; - if (out) *out = num_buff; + long double number = significand * pow(10, -(double)places_after_decimal); + if (out) *out = neg_allowed ? number : -number; if (prob_float) return CSV_DOUBLE; - else { - long double log10_num_buff; - if (!neg_allowed) log10_num_buff = log10(-num_buff); - else log10_num_buff = log10(num_buff); - - if (log10_num_buff < log10(std::numeric_limits::max())) - return CSV_INT; - else if (log10_num_buff < log10(std::numeric_limits::max())) - return CSV_LONG_INT; - else if (log10_num_buff < log10(std::numeric_limits::max())) - return CSV_LONG_LONG_INT; - else // Conversion to long long will cause an overflow - return CSV_DOUBLE; - } - } - else { - // Just whitespace - return CSV_NULL; + + // We can assume number is always positive + if (number < _INT_MAX) + return CSV_INT; + else if (number < _LONG_MAX) + return CSV_LONG_INT; + else if (number < _LONG_LONG_MAX) + return CSV_LONG_LONG_INT; + else // Conversion to long long will cause an overflow + return CSV_DOUBLE; } + + // Just whitespace + return CSV_NULL; } } \ No newline at end of file diff --git a/tests/test_data_type.cpp b/tests/test_data_type.cpp index 91a39ab4..c5ec15c2 100644 --- a/tests/test_data_type.cpp +++ b/tests/test_data_type.cpp @@ -53,4 +53,42 @@ TEST_CASE( "Recognize Floats Properly", "[dtype_float]" ) { REQUIRE(data_type(e, &out) == CSV_DOUBLE); REQUIRE(is_equal(out, 2.71828)); +} + +TEST_CASE("Integer Overflow", "[int_overflow]") { + const long double _INT_MAX = (long double)std::numeric_limits::max(); + const long double _LONG_MAX = (long double)std::numeric_limits::max(); + const long double _LONG_LONG_MAX = (long double)std::numeric_limits::max(); + + std::string s; + long double out; + + s = std::to_string((long long)_INT_MAX + 1); + if (_INT_MAX == _LONG_MAX) { + REQUIRE(data_type(s, &out) == CSV_LONG_LONG_INT); + } + else { + REQUIRE(data_type(s, &out) == CSV_LONG_INT); + } + + REQUIRE(out == (long long)_INT_MAX + 1); +} + +TEST_CASE( "Recognize Sub-Unit Double Values", "[regression_double]" ) { + std::string s("0.15"); + long double out; + REQUIRE(data_type(s, &out) == CSV_DOUBLE); + REQUIRE(is_equal(out, 0.15)); +} + +TEST_CASE( "Recognize Double Values", "[regression_double2]" ) { + // Test converting double values back and forth + long double out; + std::string s; + + for (double i = 0; i <= 2.0; i += 0.01) { + s = std::to_string(i); + REQUIRE(data_type(s, &out) == CSV_DOUBLE); + REQUIRE(is_equal(out, i)); + } } \ No newline at end of file From 186c11c24194076326c17c897eaa86d6bd59d5fb Mon Sep 17 00:00:00 2001 From: Vincent La Date: Mon, 25 Mar 2019 15:55:33 -0700 Subject: [PATCH 2/4] Relocated problematic integer overflow tests elsewhere --- tests/test_read_csv.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/test_read_csv.cpp b/tests/test_read_csv.cpp index 3104dad5..b11e0327 100644 --- a/tests/test_read_csv.cpp +++ b/tests/test_read_csv.cpp @@ -251,8 +251,6 @@ TEST_CASE("Test read_row() CSVField - Memory", "[read_row_csvf2]") { csv_string << "3.14,9999" << std::endl << "60,70" << std::endl << "," << std::endl - << (std::numeric_limits::max() - 100) << "," - << (std::numeric_limits::max()/2) << std::endl << std::to_string(big_num) << "," << std::endl; auto rows = parse(csv_string.str(), format); @@ -277,17 +275,6 @@ TEST_CASE("Test read_row() CSVField - Memory", "[read_row_csvf2]") { REQUIRE(row[0].is_null()); REQUIRE(row[1].is_null()); - // Fourth Row - rows.pop_front(); - row = rows.front(); - - // Older versions of g++ have issues with numeric_limits -#if (!defined(__GNUC__) || __GNUC__ >= 5) - REQUIRE((row[0].type() == CSV_INT || row[0].type() == CSV_LONG_INT)); - REQUIRE(row[0].get() == std::numeric_limits::max() - 100); - // REQUIRE(row[1].get() == std::numeric_limits::max()/2); -#endif - // Fourth Row rows.pop_front(); row = rows.front(); From c48781248295c1d52c21430c3e7a632327f6486f Mon Sep 17 00:00:00 2001 From: Vincent La Date: Mon, 25 Mar 2019 16:29:48 -0700 Subject: [PATCH 3/4] Revert "Relocated problematic integer overflow tests elsewhere" This reverts commit 186c11c24194076326c17c897eaa86d6bd59d5fb. --- tests/test_read_csv.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_read_csv.cpp b/tests/test_read_csv.cpp index b11e0327..3104dad5 100644 --- a/tests/test_read_csv.cpp +++ b/tests/test_read_csv.cpp @@ -251,6 +251,8 @@ TEST_CASE("Test read_row() CSVField - Memory", "[read_row_csvf2]") { csv_string << "3.14,9999" << std::endl << "60,70" << std::endl << "," << std::endl + << (std::numeric_limits::max() - 100) << "," + << (std::numeric_limits::max()/2) << std::endl << std::to_string(big_num) << "," << std::endl; auto rows = parse(csv_string.str(), format); @@ -275,6 +277,17 @@ TEST_CASE("Test read_row() CSVField - Memory", "[read_row_csvf2]") { REQUIRE(row[0].is_null()); REQUIRE(row[1].is_null()); + // Fourth Row + rows.pop_front(); + row = rows.front(); + + // Older versions of g++ have issues with numeric_limits +#if (!defined(__GNUC__) || __GNUC__ >= 5) + REQUIRE((row[0].type() == CSV_INT || row[0].type() == CSV_LONG_INT)); + REQUIRE(row[0].get() == std::numeric_limits::max() - 100); + // REQUIRE(row[1].get() == std::numeric_limits::max()/2); +#endif + // Fourth Row rows.pop_front(); row = rows.front(); From abaae6ea7587d12e63304c772285077a400826c8 Mon Sep 17 00:00:00 2001 From: Vincent La Date: Mon, 25 Mar 2019 16:58:43 -0700 Subject: [PATCH 4/4] Overflow errors with attempted fix --- src/data_type.cpp | 11 +++++++---- tests/test_read_csv.cpp | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/data_type.cpp b/src/data_type.cpp index acddbbfd..6db7117d 100644 --- a/src/data_type.cpp +++ b/src/data_type.cpp @@ -47,7 +47,8 @@ namespace csv::internals { bool prob_float = false; unsigned places_after_decimal = 0; - unsigned long significand = 0; + long double integral_part = 0, + decimal_part = 0; for (size_t i = 0, ilen = in.size(); i < ilen; i++) { const char& current = in[i]; @@ -95,9 +96,11 @@ namespace csv::internals { unsigned digit = current - '0'; if (prob_float) { places_after_decimal++; + decimal_part = (decimal_part * 10) + digit; + } + else { + integral_part = (integral_part * 10) + digit; } - - significand = (significand * 10) + digit; } else { return CSV_STRING; @@ -107,7 +110,7 @@ namespace csv::internals { // No non-numeric/non-whitespace characters found if (has_digit) { - long double number = significand * pow(10, -(double)places_after_decimal); + long double number = integral_part + decimal_part * pow(10, -(double)places_after_decimal); if (out) *out = neg_allowed ? number : -number; if (prob_float) diff --git a/tests/test_read_csv.cpp b/tests/test_read_csv.cpp index 3104dad5..aea128df 100644 --- a/tests/test_read_csv.cpp +++ b/tests/test_read_csv.cpp @@ -291,8 +291,9 @@ TEST_CASE("Test read_row() CSVField - Memory", "[read_row_csvf2]") { // Fourth Row rows.pop_front(); row = rows.front(); + double big_num_csv = row[0].get(); REQUIRE(row[0].type() == CSV_DOUBLE); // Overflow - REQUIRE(internals::is_equal(row[0].get(), big_num)); + REQUIRE(internals::is_equal(big_num_csv, big_num)); } TEST_CASE("Test read_row() CSVField - Power Status", "[read_row_csvf3]") {