From 1f7119f5a11d3772d24c0ce4e1883d35e20a30d1 Mon Sep 17 00:00:00 2001 From: yangli Date: Fri, 16 Sep 2022 17:24:43 -0500 Subject: [PATCH] finish boss-fpsp --- include/CMakeLists.txt | 51 ++++++++++++++------------- include/boss/fq_utils.hpp | 8 +++-- source/fq_utils.cpp | 62 +++++++++++++++++++-------------- standalone/fqsp/source/main.cpp | 4 +-- test/source/test_fqsp.cpp | 51 +++++++++++++++++++++++++-- 5 files changed, 116 insertions(+), 60 deletions(-) diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 5da5847..25200e1 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -4,19 +4,19 @@ cmake_minimum_required(VERSION 3.14...3.22) # Note: update this to your new project's name and version project( - boss - VERSION 1.0.0 - LANGUAGES CXX + boss + VERSION 1.0.0 + LANGUAGES CXX ) # ---- Include guards ---- -if (PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) - message( - FATAL_ERROR - "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there." - ) -endif () +if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) + message( + FATAL_ERROR + "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there." + ) +endif() option(BOSS_HAS_BOOST "Do not download Boost" OFF) @@ -34,12 +34,11 @@ include(../cmake/CPM.cmake) CPMAddPackage("gh:TheLartians/PackageProject.cmake@1.8.0") CPMAddPackage( - GITHUB_REPOSITORY ylab-hi/BINARY - VERSION 1.1.0 - OPTIONS "BINARY_BUILD_STANDALONE NO" "BINARY_BUILD_TESTS NO" + GITHUB_REPOSITORY ylab-hi/BINARY + VERSION 1.1.0 + OPTIONS "BINARY_BUILD_STANDALONE NO" "BINARY_BUILD_TESTS NO" ) - find_package(Boost REQUIRED COMPONENTS system iostreams) # ---- Add source files ---- @@ -61,11 +60,13 @@ set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 20) target_compile_options(${PROJECT_NAME} PUBLIC "$<$:/permissive->") # Link dependencies -target_link_libraries(${PROJECT_NAME} PUBLIC spdlog::spdlog Boost::boost Boost::iostreams binary::binary) +target_link_libraries( + ${PROJECT_NAME} PUBLIC spdlog::spdlog Boost::boost Boost::iostreams binary::binary +) target_include_directories( - ${PROJECT_NAME} PUBLIC $ - $ + ${PROJECT_NAME} PUBLIC $ + $ ) # ---- Create an installable target ---- @@ -76,13 +77,13 @@ target_include_directories( string(TOLOWER ${PROJECT_NAME}/version.h VERSION_HEADER_LOCATION) packageProject( - NAME ${PROJECT_NAME} - VERSION ${PROJECT_VERSION} - NAMESPACE ${PROJECT_NAME} - BINARY_DIR ${PROJECT_BINARY_DIR} - INCLUDE_DIR . - INCLUDE_DESTINATION ../include/${PROJECT_NAME}-${PROJECT_VERSION} - VERSION_HEADER "${VERSION_HEADER_LOCATION}" - COMPATIBILITY SameMajorVersion - DEPENDENCIES "spdlog 1.10.0" + NAME ${PROJECT_NAME} + VERSION ${PROJECT_VERSION} + NAMESPACE ${PROJECT_NAME} + BINARY_DIR ${PROJECT_BINARY_DIR} + INCLUDE_DIR . + INCLUDE_DESTINATION ../include/${PROJECT_NAME}-${PROJECT_VERSION} + VERSION_HEADER "${VERSION_HEADER_LOCATION}" + COMPATIBILITY SameMajorVersion + DEPENDENCIES "spdlog 1.10.0" ) diff --git a/include/boss/fq_utils.hpp b/include/boss/fq_utils.hpp index 9d460e2..849500e 100644 --- a/include/boss/fq_utils.hpp +++ b/include/boss/fq_utils.hpp @@ -42,7 +42,7 @@ namespace boss::fqsp { [[nodiscard]] bool is_skip(int c) { if (c == '\n') { ++line_count_; // count lines 0-based index - if(saved_line_count_ == 0) return true; + if (saved_line_count_ == 0) return true; } if (FqDirection::Forward == direction_) { @@ -70,13 +70,15 @@ namespace boss::fqsp { void split_fq_impl_txt(const fs::path& input, FqDirection direction); + [[nodiscard]] bool check_filename(std::string_view filename); + } // namespace details //--------------Definitions of functions for splitting ncbi fq files----// void split_fq(const fs::path& input); - void test_gz_read(const fs::path& path); - void test_gz_write(const fs::path& path); + + using details::check_filename; } // namespace boss::fqsp diff --git a/source/fq_utils.cpp b/source/fq_utils.cpp index fd6c5a8..b7c999d 100644 --- a/source/fq_utils.cpp +++ b/source/fq_utils.cpp @@ -1,9 +1,10 @@ +#include #include #include #include #include -#include #include +#include #include namespace boss::fqsp { @@ -14,7 +15,7 @@ namespace boss::fqsp { io::filtering_istream in; in.push(fq_filter(direction)); in.push(io::file_source(input.string(), std::ios_base::in)); - std::ofstream out{fmt::format("{}{}{}", input.stem().string(), + std::ofstream out{fmt::format("{}.{}{}", input.stem().string(), direction == FqDirection::Forward ? "1" : "2", input.extension().string())}; io::copy(in, out); @@ -24,10 +25,9 @@ namespace boss::fqsp { void details::split_fq_impl_gz(const fs::path& input, FqDirection direction) { spdlog::debug("Processing file: {} {}", input.string(), direction == FqDirection::Forward ? "Forward" : "Reverse"); - std::ofstream output_file{ - fmt::format("{}{}{}", input.stem().string(), direction == FqDirection::Forward ? "1" : "2", - input.extension().string()), - std::ios_base::out | std::ios_base::binary}; + std::ofstream output_file{fmt::format("{}.{}{}", input.stem().stem().string(), + direction == FqDirection::Forward ? "1" : "2", ".fq.gz"), + std::ios_base::out | std::ios_base::binary}; io::filtering_istream in; in.push(fqsp::fq_filter(direction)); @@ -44,33 +44,41 @@ namespace boss::fqsp { bool details::is_gzip(const fs::path& input) { return input.extension() == ".gz"; } + bool details::check_filename(std::string_view filename) { + if (!binary::utils::check_file_path(filename)) { + spdlog::error("File {} does not exist", filename); + return false; + } + + auto path = fs::path(filename); + + if (is_gzip(path)) { + if (!(path.stem().extension() == ".fq")) { + spdlog::error("File {} is not a valid .fq.gz file", filename); + return false; + } + return true; + } + + if (!(path.extension() == ".fq")) { + spdlog::error("File {} is not a valid .fq file", filename); + return false; + } + + return true; + } + void split_fq(const fs::path& input) { + std::future future; if (details::is_gzip(input)) { - details::split_fq_impl_gz(input, FqDirection::Forward); + future = std::async(details::split_fq_impl_gz, input, FqDirection::Forward); + // main thread details::split_fq_impl_gz(input, FqDirection::Reverse); } else { - details::split_fq_impl_txt(input, FqDirection::Forward); + future = std::async(details::split_fq_impl_txt, input, FqDirection::Forward); details::split_fq_impl_txt(input, FqDirection::Reverse); } + future.wait(); } - void test_gz_write(const fs::path& path) { - std::ofstream out{fmt::format("{}{}", path.string(), ".gz")}; - - io::filtering_ostream fis; - fis.push(io::gzip_compressor()); - fis.push(out); - - fis << "Hello World!\n"; - io::close(fis); - out.close(); - } - - void test_gz_read(const fs::path& path) { - std::ifstream file(path.string(), std::ios_base::in | std::ios_base::binary); - io::filtering_streambuf in; - in.push(io::gzip_decompressor()); - in.push(file); - boost::iostreams::copy(in, std::cout); - } } // namespace boss::fqsp diff --git a/standalone/fqsp/source/main.cpp b/standalone/fqsp/source/main.cpp index 7e9efb1..1acf77c 100644 --- a/standalone/fqsp/source/main.cpp +++ b/standalone/fqsp/source/main.cpp @@ -42,11 +42,9 @@ auto main(int argc, char** argv) -> int { try { auto input = result["input"].as(); - if (!binary::utils::check_file_path(input)) { - spdlog::error("Input file does not exist: {}", input); + if (!boss::fqsp::check_filename(input)) { std::exit(1); } - boss::fqsp::split_fq(fs::path(input)); } catch (const cxxopts::OptionException& e) { diff --git a/test/source/test_fqsp.cpp b/test/source/test_fqsp.cpp index 96668ee..c75746d 100644 --- a/test/source/test_fqsp.cpp +++ b/test/source/test_fqsp.cpp @@ -2,7 +2,12 @@ #include #include +#include +#include +#include +#include #include +#include TEST_SUITE("fqsp") { namespace bp = boss::fqsp; @@ -11,12 +16,54 @@ TEST_SUITE("fqsp") { constexpr std::string_view input_gz = "../../test/data/test.fq.gz"; constexpr std::string_view input = "../../test/data/test.fq"; + void test_gz_write(const fs::path& path) { + namespace io = boost::iostreams; + std::ofstream out{fmt::format("{}{}", path.string(), ".gz")}; + + io::filtering_ostream fis; + fis.push(io::gzip_compressor()); + fis.push(out); + + fis << "Hello World!\n"; + io::close(fis); + out.close(); + } + + void test_gz_read(const fs::path& path) { + namespace io = boost::iostreams; + std::ifstream file(path.string(), std::ios_base::in | std::ios_base::binary); + io::filtering_streambuf in; + in.push(io::gzip_decompressor()); + in.push(file); + io::copy(in, std::cout); + } + TEST_CASE("is_gzip") { - spdlog::info("current path: {}", fs::current_path().string()); CHECK(bu::check_file_path({input_gz, input})); CHECK(bp::details::is_gzip(input_gz)); CHECK_FALSE(bp::details::is_gzip(input)); } - TEST_CASE("read_fq") { CHECK_NOTHROW(bp::test_gz_read(input_gz)); } + TEST_CASE("read_fq") { CHECK_NOTHROW(test_gz_read(input_gz)); } + TEST_CASE("write_fq") { + CHECK_NOTHROW(test_gz_write("hello.txt")); + CHECK(bu::check_file_path("hello.txt.gz")); + fs::remove("hello.txt.gz"); + } + + TEST_CASE("test command line for gz fq") { + CHECK_NOTHROW(bp::split_fq(input_gz)); + CHECK(bu::check_file_path("test.1.fq.gz")); + CHECK(bu::check_file_path("test.2.fq.gz")); + fs::remove("test.1.fq.gz"); + fs::remove("test.2.fq.gz"); + } + + TEST_CASE("test command line for txt fq") { + CHECK_NOTHROW(bp::split_fq(input)); + CHECK(bu::check_file_path("test.1.fq")); + CHECK(bu::check_file_path("test.2.fq")); + fs::remove("test.1.fq"); + fs::remove("test.2.fq"); + } } \ No newline at end of file