From 985966e155e071b7db3578ed372da663ec5c003f Mon Sep 17 00:00:00 2001 From: Vincent La Date: Wed, 5 Jun 2024 22:25:33 -0700 Subject: [PATCH] Update README + unit tests RE: decimal parsing --- README.md | 6 ++++ include/internal/csv_row.cpp | 10 +++---- include/internal/csv_row.hpp | 10 ++++--- include/internal/data_type.hpp | 14 +++++----- single_include/csv.hpp | 36 ++++++++++++------------ single_include_test/csv.hpp | 51 +++++++++++++++++++++++++++------- tests/test_csv_field.cpp | 41 +++++++++++++++++++++++++++ 7 files changed, 123 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 63bc8a9..493e7eb 100644 --- a/README.md +++ b/README.md @@ -205,6 +205,12 @@ for (auto& row: reader) { std::cout << "Hex value is " << value << std::endl; } + // Non-imperial decimal numbers can be handled this way + long double decimalValue; + if (row["decimalNumber"].try_parse_decimal(decimalValue, ',')) { + std::cout << "Decimal value is " << decimalValue << std::endl; + } + // .. } } diff --git a/include/internal/csv_row.cpp b/include/internal/csv_row.cpp index d02a73a..93e9291 100644 --- a/include/internal/csv_row.cpp +++ b/include/internal/csv_row.cpp @@ -164,18 +164,16 @@ namespace csv { return true; } - // try_parse_decimal uses the specified decimal symbol and - // also sets the private members _type and value - CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalsymbol) { + CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) { // If field has already been parsed to empty, no need to do it aagin: if (this->_type == DataType::CSV_NULL) return false; - // Not yet parsed or possibly parsed with other decimalsymbol + // Not yet parsed or possibly parsed with other decimalSymbol if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE) - this->_type = internals::data_type(this->sv, &this->value, decimalsymbol); // parse again + this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again - // Integral types are not affected by decimalsymbol and need not be parsed again + // Integral types are not affected by decimalSymbol and need not be parsed again // Either we already had an integral type before, or we we just got any numeric type now. if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) { diff --git a/include/internal/csv_row.hpp b/include/internal/csv_row.hpp index e2a09e5..bd92c0a 100644 --- a/include/internal/csv_row.hpp +++ b/include/internal/csv_row.hpp @@ -217,11 +217,13 @@ namespace csv { /** Parse a hexadecimal value, returning false if the value is not hex. */ bool try_parse_hex(int& parsedValue); - /** Parse a value, returning false if the value is not decimal. - * If true it also sets the private members _type and value. - * Decimal symbol may be given explicitly, default is '.'. + /** Attempts to parse a decimal (or integer) value using the given symbol, + * returning `true` if the value is numeric. + * + * @note This method also updates this field's type + * */ - bool try_parse_decimal(long double& dVal, const char decimalsymbol = '.'); + bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.'); /** Compares the contents of this field to a numeric value. If this * field does not contain a numeric value, then all comparisons return diff --git a/include/internal/data_type.hpp b/include/internal/data_type.hpp index 4fd442d..01ac7e2 100644 --- a/include/internal/data_type.hpp +++ b/include/internal/data_type.hpp @@ -235,11 +235,11 @@ namespace csv { * @param[in] in String value to be examined * @param[out] out Pointer to long double where results of numeric parsing * get stored - * @param[in] decimalsymbol the character separating integral and decimal part, + * @param[in] decimalSymbol the character separating integral and decimal part, * defaults to '.' if omitted */ CONSTEXPR_14 - DataType data_type(csv::string_view in, long double* const out, const char decimalsymbol) { + DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) { // Empty string --> NULL if (in.size() == 0) return DataType::CSV_NULL; @@ -285,7 +285,7 @@ namespace csv { is_negative = true; break; - // case decimalsymbol: not allowed because decimalsymbol is not a literal, + // case decimalSymbol: not allowed because decimalSymbol is not a literal, // it is handled in the default block case 'e': case 'E': @@ -325,10 +325,10 @@ namespace csv { else integral_part = (integral_part * 10) + digit; } - // case decimalymbol: not allowed because decimalsymbol is not a literal. - else if (dot_allowed && current == decimalsymbol) { - dot_allowed = false; - prob_float = true; + // case decimalSymbol: not allowed because decimalSymbol is not a literal. + else if (dot_allowed && current == decimalSymbol) { + dot_allowed = false; + prob_float = true; } else { return DataType::CSV_STRING; diff --git a/single_include/csv.hpp b/single_include/csv.hpp index 505a8b2..83f64ee 100644 --- a/single_include/csv.hpp +++ b/single_include/csv.hpp @@ -5295,11 +5295,11 @@ namespace csv { * @param[in] in String value to be examined * @param[out] out Pointer to long double where results of numeric parsing * get stored - * @param[in] decimalsymbol the character separating integral and decimal part, + * @param[in] decimalSymbol the character separating integral and decimal part, * defaults to '.' if omitted */ CONSTEXPR_14 - DataType data_type(csv::string_view in, long double* const out, const char decimalsymbol) { + DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) { // Empty string --> NULL if (in.size() == 0) return DataType::CSV_NULL; @@ -5345,7 +5345,7 @@ namespace csv { is_negative = true; break; - // case decimalsymbol: not allowed because decimalsymbol is not a literal, + // case decimalSymbol: not allowed because decimalSymbol is not a literal, // it is handled in the default block case 'e': case 'E': @@ -5385,10 +5385,10 @@ namespace csv { else integral_part = (integral_part * 10) + digit; } - // case decimalymbol: not allowed because decimalsymbol is not a literal. - else if (dot_allowed && current == decimalsymbol) { - dot_allowed = false; - prob_float = true; + // case decimalSymbol: not allowed because decimalSymbol is not a literal. + else if (dot_allowed && current == decimalSymbol) { + dot_allowed = false; + prob_float = true; } else { return DataType::CSV_STRING; @@ -5612,11 +5612,13 @@ namespace csv { /** Parse a hexadecimal value, returning false if the value is not hex. */ bool try_parse_hex(int& parsedValue); - /** Parse a value, returning false if the value is not decimal. - * If true it also sets the private members _type and value. - * Decimal symbol may be given explicitly, default is '.'. + /** Attempts to parse a decimal (or integer) value using the given symbol, + * returning `true` if the value is numeric. + * + * @note This method also updates this field's type + * */ - bool try_parse_decimal(long double& dVal, const char decimalsymbol = '.'); + bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.'); /** Compares the contents of this field to a numeric value. If this * field does not contain a numeric value, then all comparisons return @@ -7855,18 +7857,16 @@ namespace csv { return true; } - // try_parse_decimal uses the specified decimal symbol and - // also sets the private members _type and value - CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalsymbol) { + CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) { // If field has already been parsed to empty, no need to do it aagin: if (this->_type == DataType::CSV_NULL) - return false; + return false; - // Not yet parsed or possibly parsed with other decimalsymbol + // Not yet parsed or possibly parsed with other decimalSymbol if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE) - this->_type = internals::data_type(this->sv, &this->value, decimalsymbol); // parse again + this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again - // Integral types are not affected by decimalsymbol and need not be parsed again + // Integral types are not affected by decimalSymbol and need not be parsed again // Either we already had an integral type before, or we we just got any numeric type now. if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) { diff --git a/single_include_test/csv.hpp b/single_include_test/csv.hpp index 96fe099..83f64ee 100644 --- a/single_include_test/csv.hpp +++ b/single_include_test/csv.hpp @@ -5151,7 +5151,8 @@ namespace csv { template<> inline DataType type_num() { return DataType::CSV_NULL; } template<> inline DataType type_num() { return DataType::CSV_STRING; } - CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr); + CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr, + const char decimalsymbol = '.'); #endif /** Given a byte size, return the largest number than can be stored in @@ -5294,9 +5295,11 @@ namespace csv { * @param[in] in String value to be examined * @param[out] out Pointer to long double where results of numeric parsing * get stored + * @param[in] decimalSymbol the character separating integral and decimal part, + * defaults to '.' if omitted */ CONSTEXPR_14 - DataType data_type(csv::string_view in, long double* const out) { + DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) { // Empty string --> NULL if (in.size() == 0) return DataType::CSV_NULL; @@ -5342,14 +5345,8 @@ namespace csv { is_negative = true; break; - case '.': - if (!dot_allowed) { - return DataType::CSV_STRING; - } - - dot_allowed = false; - prob_float = true; - break; + // case decimalSymbol: not allowed because decimalSymbol is not a literal, + // it is handled in the default block case 'e': case 'E': // Process scientific notation @@ -5388,6 +5385,11 @@ namespace csv { else integral_part = (integral_part * 10) + digit; } + // case decimalSymbol: not allowed because decimalSymbol is not a literal. + else if (dot_allowed && current == decimalSymbol) { + dot_allowed = false; + prob_float = true; + } else { return DataType::CSV_STRING; } @@ -5610,6 +5612,14 @@ namespace csv { /** Parse a hexadecimal value, returning false if the value is not hex. */ bool try_parse_hex(int& parsedValue); + /** Attempts to parse a decimal (or integer) value using the given symbol, + * returning `true` if the value is numeric. + * + * @note This method also updates this field's type + * + */ + bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.'); + /** Compares the contents of this field to a numeric value. If this * field does not contain a numeric value, then all comparisons return * false. @@ -7847,6 +7857,27 @@ namespace csv { return true; } + CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) { + // If field has already been parsed to empty, no need to do it aagin: + if (this->_type == DataType::CSV_NULL) + return false; + + // Not yet parsed or possibly parsed with other decimalSymbol + if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE) + this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again + + // Integral types are not affected by decimalSymbol and need not be parsed again + + // Either we already had an integral type before, or we we just got any numeric type now. + if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) { + dVal = this->value; + return true; + } + + // CSV_NULL or CSV_STRING, not numeric + return false; + } + #ifdef _MSC_VER #pragma region CSVRow Iterator #endif diff --git a/tests/test_csv_field.cpp b/tests/test_csv_field.cpp index fcfd356..ce36056 100644 --- a/tests/test_csv_field.cpp +++ b/tests/test_csv_field.cpp @@ -141,6 +141,47 @@ TEST_CASE("CSVField try_parse_hex()", "[test_csv_field_parse_hex]") { } } + +TEST_CASE("CSVField try_parse_decimal()", "[test_csv_field_parse_hex]") { + SECTION("Test try_parse_decimal() with non-numeric value") { + long double output = 0; + std::string input = "stroustrup"; + CSVField testField(input); + + REQUIRE(testField.try_parse_decimal(output, ',') == false); + REQUIRE(testField.type() == DataType::CSV_STRING); + } + + SECTION("Test try_parse_decimal() with integer value") { + long double output = 0; + std::string input = "2024"; + CSVField testField(input); + + REQUIRE(testField.try_parse_decimal(output, ',') == true); + REQUIRE(testField.type() == DataType::CSV_INT16); + REQUIRE(internals::is_equal(output, 2024.0l)); + } + + SECTION("Test try_parse_decimal() with various valid values") { + std::string input; + long double output = 0; + long double expected = 0; + + std::tie(input, expected) = + GENERATE(table( + csv_test::FLOAT_TEST_CASES)); + + // Replace '.' with ',' + std::replace(input.begin(), input.end(), '.', ','); + + CSVField testField(input); + + REQUIRE(testField.try_parse_decimal(output, ',') == true); + REQUIRE(testField.type() == DataType::CSV_DOUBLE); + REQUIRE(internals::is_equal(output, expected)); + } +} + TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Float to Int", "[test_csv_field_get_float_as_int]", unsigned char, unsigned short, unsigned int, unsigned long long int, signed char, short, int, long long int) {