From d12eac8aac60057e03647f9e11b0737d0d26870f Mon Sep 17 00:00:00 2001 From: "L. E. Segovia" Date: Fri, 12 Jan 2024 01:44:10 +0000 Subject: [PATCH] substrate: Add basic file utility to read all the lines from a file --- substrate/file_utils | 69 +++++++++++++++++++++++++++ test/file_utils.cxx | 110 +++++++++++++++++++++++++++++++++++++++++++ test/meson.build | 2 +- 3 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 substrate/file_utils create mode 100644 test/file_utils.cxx diff --git a/substrate/file_utils b/substrate/file_utils new file mode 100644 index 00000000..b5f91797 --- /dev/null +++ b/substrate/file_utils @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef SUBSTRATE_FILE_UTILS +#define SUBSTRATE_FILE_UTILS + +#include +#include +#include +#include +#include +#include + +namespace substrate +{ + template + std::vector read_lines(fd_t &file, typename Str::value_type separator = '\n') noexcept + { + using char_t = typename Str::value_type; + + if (!file.valid()) + return {}; + + const auto map{file.map(PROT_READ)}; + if (!map.valid()) + return {}; + + std::vector result; + + const auto *const cbegin{map.address()}; + + const auto *const cend + { + [&]() + { + const auto *begin{cbegin}; + std::advance(begin, map.length() / sizeof(char_t)); + return begin; + }() + }; + + for (const auto *begin{cbegin}; begin != cend;) + { + const auto *boundary + { + std::find_if + ( + begin, + cend, + [&](const auto chr) { return chr == separator || (separator == '\n' && chr == '\r'); } + ) + }; + + result.emplace_back(begin, boundary); + + if (boundary != cend) { + if (separator == '\n' && *boundary == '\r' && (boundary + 1) != cend && *(boundary + 1) == '\n') + std::advance(boundary, 2); + else + std::advance(boundary, 1); + } + + begin = boundary; + } + + return result; + } +} // namespace substrate + +#endif // SUBSTRATE_FILE_UTILS diff --git a/test/file_utils.cxx b/test/file_utils.cxx new file mode 100644 index 00000000..d422e50a --- /dev/null +++ b/test/file_utils.cxx @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: BSD-3-Clause +#include +#include +#include +#include + +using substrate::fd_t; +using substrate::indexedIterator_t; +using substrate::read_lines; + +TEST_CASE("lines reader", "[boundedIterator_t]") +{ + const std::array lines + { + { + "## Enforcement Responsibilities 1", + "", + "Community leaders are responsible for clarifying and enforcing our standards of", + "acceptable behavior and will take appropriate and fair corrective action in", + "response to any behavior that they deem inappropriate, threatening, offensive,", + "or harmful." + } + }; + + { + fd_t text{"info.txt", O_WRONLY | O_CREAT | O_EXCL | O_TEXT, substrate::normalMode}; + REQUIRE(text.valid()); + for (const auto &i : lines) + { + REQUIRE(text.write(i)); + REQUIRE(text.write('\n')); + } + } + + { + fd_t text{"info.txt", O_RDONLY | O_TEXT}; + REQUIRE(text.valid()); + // Memory mapping here will cause the conversion to be skipped + const auto input {read_lines(text)}; + REQUIRE(lines.size() == input.size()); + for (const auto &line : indexedIterator_t{lines}) + { + const auto &contents {line.second}; + const auto &input_line {input[line.first]}; + REQUIRE(input_line.find_first_of('\n', input_line.size() - 1) == std::string::npos); + REQUIRE(input_line.find_first_of('\r', input_line.size() - 1) == std::string::npos); + REQUIRE(input_line == contents); + } + } + + { + const auto result{unlink("info.txt")}; + if (result != 0) + REQUIRE(errno == 0); + else + SUCCEED(); + } +} + +TEST_CASE("lines reader with std::u16string", "[boundedIterator_t]") +{ + const std::array lines + {{ + u"## Enforcement Responsibilities 2", + u"", + u"Community leaders are responsible for clarifying and enforcing our standards of", + u"acceptable behavior and will take appropriate and fair corrective action in", + u"response to any behavior that they deem inappropriate, threatening, offensive,", + u"or harmful." + }}; + + { + fd_t text{"info.txt", O_WRONLY | O_CREAT | O_EXCL | O_BINARY, substrate::normalMode}; + REQUIRE(text.valid()); + for (const auto &line : lines) + { + REQUIRE(text.write(line.data(), line.size() * sizeof(std::u16string::value_type))); + REQUIRE(text.write(u'\n')); + } + } + + { + fd_t text{"info.txt", O_RDONLY | O_BINARY}; + REQUIRE(text.valid()); + const auto input{substrate::read_lines(text)}; + REQUIRE(lines.size() == input.size()); + for (const auto &line : indexedIterator_t{lines}) + { + const auto &contents {line.second}; + const auto &input_line {input[line.first]}; + REQUIRE(input_line.find_first_of(u'\n', input_line.size() - 1) == std::u16string::npos); + REQUIRE(input_line.find_first_of(u'\r', input_line.size() - 1) == std::u16string::npos); + REQUIRE(input_line == contents); + } + } + + { + const auto result{unlink("info.txt")}; + if (result != 0) + REQUIRE(errno == 0); + else + SUCCEED(); + } +} + +TEST_CASE() +{ + unlink("info.txt"); + SUCCEED(); +} diff --git a/test/meson.build b/test/meson.build index caeca856..c70c42b5 100644 --- a/test/meson.build +++ b/test/meson.build @@ -6,7 +6,7 @@ testSrcs = [ 'buffer_utils.cxx', 'pointer_utils.cxx', 'crypto/twofish.cxx', 'crypto/sha256.cxx', 'crypto/sha512.cxx', 'zip_container.cxx', 'affinity.cxx', 'threaded_queue.cxx', 'thread_pool.cxx', - 'mmap.cxx' + 'mmap.cxx', 'file_utils.cxx' ] if target_machine.system() == 'linux'