diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ba64f3d..d610d8c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -5,9 +5,11 @@ on: branches: - main - release* + - master pull_request: branches: - main + - master - release* env: diff --git a/README.md b/README.md index f78e3f8..61baaa8 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,13 @@ [![License](https://img.shields.io/github/license/ylab-hi/BINARY)](https://github.com/cauliyang/boss/blob/main/LICENSE) ![compiler](https://img.shields.io/badge/Compiler-GCC10%20%7C%20GCC11%20%7C%20GCC12-green) -# BioinfOrmaticS toolboxeS aka BOSS +# **B**ioinf**O**rmatic**S** toolboxe**S** aka **BOSS** ## Current Tools | Tool | Description | Status | |-------------------------|-------------------------------------|----------------------------------------------------------------| -| [boss-fqsp](#boss-fqsp) | Fastq Splitter for Paried End Reads | ![boss-fqsp](https://img.shields.io/badge/Version-1.0.0-green) | +| [boss-fqsp](#boss-fqsp) | Fastq Splitter for Paired End Reads | ![boss-fqsp](https://img.shields.io/badge/Version-1.0.0-green) | ## Usage diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 534c100..264bfd8 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -18,9 +18,12 @@ if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) ) endif() +option(BOSS_HAS_BOOST "Do not download Boost" OFF) + message(STATUS "Started CMake for ${PROJECT_NAME} v${PROJECT_VERSION}...") message(STATUS "Compiler name: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS "Cmake Binary path: ${CMAKE_BINARY_DIR}") +message(STATUS "BOSS has boost: ${BOSS_HAS_BOOST}") # ---- Add dependencies via CPM ---- # see https://github.com/TheLartians/CPM.cmake for more info @@ -30,14 +33,8 @@ include(../cmake/CPM.cmake) # PackageProject.cmake will be used to make our target installable CPMAddPackage("gh:TheLartians/PackageProject.cmake@1.8.0") -CPMAddPackage( - NAME spdlog - GITHUB_REPOSITORY gabime/spdlog - VERSION 1.10.0 - OPTIONS "SPDLOG_INSTALL YES" -) - find_package(Boost REQUIRED) + # ---- Add source files ---- # Note: globbing sources is considered bad practice as CMake's generators may not detect new files @@ -56,7 +53,7 @@ set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 20) target_compile_options(${PROJECT_NAME} PUBLIC "$<$:/permissive->") # Link dependencies -target_link_libraries(${PROJECT_NAME} PUBLIC spdlog::spdlog Boost::boost) +target_link_libraries(${PROJECT_NAME} PUBLIC spdlog::spdlog Boost::boost boost_iostreams) target_include_directories( ${PROJECT_NAME} PUBLIC $ diff --git a/include/boss/fq_utils.hpp b/include/boss/fq_utils.hpp index 0c077dd..9f58015 100644 --- a/include/boss/fq_utils.hpp +++ b/include/boss/fq_utils.hpp @@ -1,13 +1,22 @@ #ifndef FQ_UTILS_H #define FQ_UTILS_H +#include + #include // input_filter_tag +#include +#include +#include +#include #include // get, put, WOULD_BLOCK #include // EOF. #include -namespace boss { +namespace boss::fqsp { + namespace fs = std::filesystem; + enum class FqDirection; + class fq_filter; enum class FqDirection { Forward, Reverse }; @@ -62,10 +71,56 @@ namespace boss { int saved_line_count_{0}; }; - //--------------Definitions of helper functions for splitting ncbi fq files----// + namespace details { + + struct txt_tag {}; + struct gzip_tag : txt_tag {}; + struct bzip2_tag : txt_tag {}; + + txt_tag format_category(std::string_view path) { + if (path.ends_with(".gz")) { + return gzip_tag{}; + } else if (path.ends_with(".bz2")) { + return bzip2_tag{}; + } else { + return txt_tag{}; + } + } + + //----------------------------Define helper functions---------------------------// + void split_fq_impl(const fs::path& input, FqDirection direction, gzip_tag) { + boost::iostreams::filtering_istream in; + in.push(fq_filter(direction)); + in.push(boost::iostreams::gzip_decompressor()); + in.push(boost::iostreams::file_source(input, std::ios_base::in | std::ios_base::binary)); + + std::ofstream out{fmt::format("{}{}{}", input.stem().string(), + direction == FqDirection::Forward ? "1" : "2", + input.extension().string())}; + boost::iostreams::copy(in, out); + out.close(); + } + + void split_fq_impl(const fs::path& input, FqDirection direction, txt_tag) { + boost::iostreams::filtering_istream in; + in.push(fq_filter(direction)); + in.push(boost::iostreams::file_source(input, std::ios_base::in)); + std::ofstream out{fmt::format("{}{}{}", input.stem().string(), + direction == FqDirection::Forward ? "1" : "2", + input.extension().string())}; + boost::iostreams::copy(in, out); + out.close(); + } + + } // namespace details + + //--------------Definitions of functions for splitting ncbi fq files----// - [[nodiscard]] bool is_gzip(std::string_view path); + void split_fq(const fs::path& input) { + details::split_fq_impl(input, FqDirection::Forward, details::format_category(input.string())); + details::split_fq_impl(input, FqDirection::Reverse, details::format_category(input.string())); + } -} // namespace boss +} // namespace boss::fqsp #endif // FQ_UTILS_H \ No newline at end of file diff --git a/source/fq_utils.cpp b/source/fq_utils.cpp index e065f13..c398eaa 100644 --- a/source/fq_utils.cpp +++ b/source/fq_utils.cpp @@ -1,5 +1,4 @@ + #include -namespace boss { - bool is_gzip(std::string_view path) { return path.ends_with(".gz"); } -} // namespace boss +namespace boss::fqsp {} // namespace boss::fqsp diff --git a/standalone/CMakeLists.txt b/standalone/CMakeLists.txt index a813607..117d2f8 100644 --- a/standalone/CMakeLists.txt +++ b/standalone/CMakeLists.txt @@ -11,40 +11,37 @@ include(../cmake/tools.cmake) include(../cmake/CPM.cmake) CPMAddPackage( - GITHUB_REPOSITORY ylab-hi/BINARY - VERSION 1.1.0 - OPTIONS "BINARY_BUILD_STANDALONE NO" "BINARY_BUILD_TESTS NO" + GITHUB_REPOSITORY ylab-hi/BINARY + VERSION 1.1.0 + OPTIONS "BINARY_BUILD_STANDALONE NO" "BINARY_BUILD_TESTS NO" ) - CPMAddPackage( - GITHUB_REPOSITORY jarro2783/cxxopts - VERSION 3.0.0 - OPTIONS "CXXOPTS_BUILD_EXAMPLES NO" "CXXOPTS_BUILD_TESTS NO" "CXXOPTS_ENABLE_INSTALL YES" + GITHUB_REPOSITORY jarro2783/cxxopts + VERSION 3.0.0 + OPTIONS "CXXOPTS_BUILD_EXAMPLES NO" "CXXOPTS_BUILD_TESTS NO" "CXXOPTS_ENABLE_INSTALL YES" ) - CPMAddPackage(NAME BOSS SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/../include) # ---- Create standalone executable ---- - function(add_standalone targetName) - message(STATUS "Adding standalone ${targetName}") - file(GLOB ${targetName}_source CONFIGURE_DEPENDS - ${CMAKE_CURRENT_LIST_DIR}/${targetName}/source/*.cpp - ) - add_executable(${targetName} ${${targetName}_source}) - target_compile_features(${targetName} PRIVATE cxx_std_20) - target_link_libraries(${targetName} PRIVATE boss::boss binary cxxopts stdc++fs) - target_include_directories( - ${targetName} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include - ${CMAKE_CURRENT_LIST_DIR}/${targetName}/include - ) - target_compile_options(${targetName} PRIVATE -Wall -Wextra -Wnon-virtual-dtor -pedantic -Werror) - set_target_properties(${targetName} PROPERTIES CXX_EXTENSIONS OFF OUTPUT_NAME boss-${targetName}) + message(STATUS "Adding standalone ${targetName}") + file(GLOB ${targetName}_source CONFIGURE_DEPENDS + ${CMAKE_CURRENT_LIST_DIR}/${targetName}/source/*.cpp + ) + add_executable(${targetName} ${${targetName}_source}) + target_compile_features(${targetName} PRIVATE cxx_std_20) + target_link_libraries(${targetName} PRIVATE boss::boss binary cxxopts stdc++fs) + target_include_directories( + ${targetName} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include + ${CMAKE_CURRENT_LIST_DIR}/${targetName}/include + ) + target_compile_options(${targetName} PRIVATE -Wall -Wextra -Wnon-virtual-dtor -pedantic -Werror) + set_target_properties(${targetName} PROPERTIES CXX_EXTENSIONS OFF OUTPUT_NAME boss-${targetName}) endfunction() -#file(GLOB sources CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp) +# file(GLOB sources CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp) -add_standalone(fqsp) \ No newline at end of file +add_standalone(fqsp) diff --git a/standalone/fqsp/source/main.cpp b/standalone/fqsp/source/main.cpp index 97acc44..2f03866 100644 --- a/standalone/fqsp/source/main.cpp +++ b/standalone/fqsp/source/main.cpp @@ -46,8 +46,10 @@ auto main(int argc, char** argv) -> int { std::exit(1); } + boss::fqsp::split_fq(fs::path(input)); + } catch (const cxxopts::OptionException& e) { - spdlog::error("{}", e.what()); + spdlog::error("{}", e.what()); std::exit(1); }