Skip to content

Commit

Permalink
finish boss-fpsp
Browse files Browse the repository at this point in the history
  • Loading branch information
yangli committed Sep 16, 2022
1 parent 3d088cc commit 1f7119f
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 60 deletions.
51 changes: 26 additions & 25 deletions include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ cmake_minimum_required(VERSION 3.14...3.22)

# Note: update this to your new project's name and version
project(
boss
VERSION 1.0.0
LANGUAGES CXX
boss
VERSION 1.0.0
LANGUAGES CXX
)

# ---- Include guards ----

if (PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR)
message(
FATAL_ERROR
"In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there."
)
endif ()
if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR)
message(
FATAL_ERROR
"In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there."
)
endif()

option(BOSS_HAS_BOOST "Do not download Boost" OFF)

Expand All @@ -34,12 +34,11 @@ include(../cmake/CPM.cmake)
CPMAddPackage("gh:TheLartians/[email protected]")

CPMAddPackage(
GITHUB_REPOSITORY ylab-hi/BINARY
VERSION 1.1.0
OPTIONS "BINARY_BUILD_STANDALONE NO" "BINARY_BUILD_TESTS NO"
GITHUB_REPOSITORY ylab-hi/BINARY
VERSION 1.1.0
OPTIONS "BINARY_BUILD_STANDALONE NO" "BINARY_BUILD_TESTS NO"
)


find_package(Boost REQUIRED COMPONENTS system iostreams)

# ---- Add source files ----
Expand All @@ -61,11 +60,13 @@ set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 20)
target_compile_options(${PROJECT_NAME} PUBLIC "$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/permissive->")

# Link dependencies
target_link_libraries(${PROJECT_NAME} PUBLIC spdlog::spdlog Boost::boost Boost::iostreams binary::binary)
target_link_libraries(
${PROJECT_NAME} PUBLIC spdlog::spdlog Boost::boost Boost::iostreams binary::binary
)

target_include_directories(
${PROJECT_NAME} PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/../include>
$<INSTALL_INTERFACE:include/${PROJECT_NAME}-${PROJECT_VERSION}>
${PROJECT_NAME} PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/../include>
$<INSTALL_INTERFACE:include/${PROJECT_NAME}-${PROJECT_VERSION}>
)

# ---- Create an installable target ----
Expand All @@ -76,13 +77,13 @@ target_include_directories(
string(TOLOWER ${PROJECT_NAME}/version.h VERSION_HEADER_LOCATION)

packageProject(
NAME ${PROJECT_NAME}
VERSION ${PROJECT_VERSION}
NAMESPACE ${PROJECT_NAME}
BINARY_DIR ${PROJECT_BINARY_DIR}
INCLUDE_DIR .
INCLUDE_DESTINATION ../include/${PROJECT_NAME}-${PROJECT_VERSION}
VERSION_HEADER "${VERSION_HEADER_LOCATION}"
COMPATIBILITY SameMajorVersion
DEPENDENCIES "spdlog 1.10.0"
NAME ${PROJECT_NAME}
VERSION ${PROJECT_VERSION}
NAMESPACE ${PROJECT_NAME}
BINARY_DIR ${PROJECT_BINARY_DIR}
INCLUDE_DIR .
INCLUDE_DESTINATION ../include/${PROJECT_NAME}-${PROJECT_VERSION}
VERSION_HEADER "${VERSION_HEADER_LOCATION}"
COMPATIBILITY SameMajorVersion
DEPENDENCIES "spdlog 1.10.0"
)
8 changes: 5 additions & 3 deletions include/boss/fq_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace boss::fqsp {
[[nodiscard]] bool is_skip(int c) {
if (c == '\n') {
++line_count_; // count lines 0-based index
if(saved_line_count_ == 0) return true;
if (saved_line_count_ == 0) return true;
}

if (FqDirection::Forward == direction_) {
Expand Down Expand Up @@ -70,13 +70,15 @@ namespace boss::fqsp {

void split_fq_impl_txt(const fs::path& input, FqDirection direction);

[[nodiscard]] bool check_filename(std::string_view filename);

} // namespace details

//--------------Definitions of functions for splitting ncbi fq files----//

void split_fq(const fs::path& input);
void test_gz_read(const fs::path& path);
void test_gz_write(const fs::path& path);

using details::check_filename;

} // namespace boss::fqsp

Expand Down
62 changes: 35 additions & 27 deletions source/fq_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#include <binary/utils.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boss/fq_utils.hpp>
#include <future>
#include <iostream>

namespace boss::fqsp {
Expand All @@ -14,7 +15,7 @@ namespace boss::fqsp {
io::filtering_istream in;
in.push(fq_filter(direction));
in.push(io::file_source(input.string(), std::ios_base::in));
std::ofstream out{fmt::format("{}{}{}", input.stem().string(),
std::ofstream out{fmt::format("{}.{}{}", input.stem().string(),
direction == FqDirection::Forward ? "1" : "2",
input.extension().string())};
io::copy(in, out);
Expand All @@ -24,10 +25,9 @@ namespace boss::fqsp {
void details::split_fq_impl_gz(const fs::path& input, FqDirection direction) {
spdlog::debug("Processing file: {} {}", input.string(),
direction == FqDirection::Forward ? "Forward" : "Reverse");
std::ofstream output_file{
fmt::format("{}{}{}", input.stem().string(), direction == FqDirection::Forward ? "1" : "2",
input.extension().string()),
std::ios_base::out | std::ios_base::binary};
std::ofstream output_file{fmt::format("{}.{}{}", input.stem().stem().string(),
direction == FqDirection::Forward ? "1" : "2", ".fq.gz"),
std::ios_base::out | std::ios_base::binary};

io::filtering_istream in;
in.push(fqsp::fq_filter(direction));
Expand All @@ -44,33 +44,41 @@ namespace boss::fqsp {

bool details::is_gzip(const fs::path& input) { return input.extension() == ".gz"; }

bool details::check_filename(std::string_view filename) {
if (!binary::utils::check_file_path(filename)) {
spdlog::error("File {} does not exist", filename);
return false;
}

auto path = fs::path(filename);

if (is_gzip(path)) {
if (!(path.stem().extension() == ".fq")) {
spdlog::error("File {} is not a valid .fq.gz file", filename);
return false;
}
return true;
}

if (!(path.extension() == ".fq")) {
spdlog::error("File {} is not a valid .fq file", filename);
return false;
}

return true;
}

void split_fq(const fs::path& input) {
std::future<void> future;
if (details::is_gzip(input)) {
details::split_fq_impl_gz(input, FqDirection::Forward);
future = std::async(details::split_fq_impl_gz, input, FqDirection::Forward);
// main thread
details::split_fq_impl_gz(input, FqDirection::Reverse);
} else {
details::split_fq_impl_txt(input, FqDirection::Forward);
future = std::async(details::split_fq_impl_txt, input, FqDirection::Forward);
details::split_fq_impl_txt(input, FqDirection::Reverse);
}
future.wait();
}

void test_gz_write(const fs::path& path) {
std::ofstream out{fmt::format("{}{}", path.string(), ".gz")};

io::filtering_ostream fis;
fis.push(io::gzip_compressor());
fis.push(out);

fis << "Hello World!\n";
io::close(fis);
out.close();
}

void test_gz_read(const fs::path& path) {
std::ifstream file(path.string(), std::ios_base::in | std::ios_base::binary);
io::filtering_streambuf<io::input> in;
in.push(io::gzip_decompressor());
in.push(file);
boost::iostreams::copy(in, std::cout);
}
} // namespace boss::fqsp
4 changes: 1 addition & 3 deletions standalone/fqsp/source/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ auto main(int argc, char** argv) -> int {

try {
auto input = result["input"].as<std::string>();
if (!binary::utils::check_file_path(input)) {
spdlog::error("Input file does not exist: {}", input);
if (!boss::fqsp::check_filename(input)) {
std::exit(1);
}

boss::fqsp::split_fq(fs::path(input));

} catch (const cxxopts::OptionException& e) {
Expand Down
51 changes: 49 additions & 2 deletions test/source/test_fqsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
#include <spdlog/spdlog.h>

#include <binary/utils.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boss/fq_utils.hpp>
#include <iostream>

TEST_SUITE("fqsp") {
namespace bp = boss::fqsp;
Expand All @@ -11,12 +16,54 @@ TEST_SUITE("fqsp") {
constexpr std::string_view input_gz = "../../test/data/test.fq.gz";
constexpr std::string_view input = "../../test/data/test.fq";

void test_gz_write(const fs::path& path) {
namespace io = boost::iostreams;
std::ofstream out{fmt::format("{}{}", path.string(), ".gz")};

io::filtering_ostream fis;
fis.push(io::gzip_compressor());
fis.push(out);

fis << "Hello World!\n";
io::close(fis);
out.close();
}

void test_gz_read(const fs::path& path) {
namespace io = boost::iostreams;
std::ifstream file(path.string(), std::ios_base::in | std::ios_base::binary);
io::filtering_streambuf<io::input> in;
in.push(io::gzip_decompressor());
in.push(file);
io::copy(in, std::cout);
}

TEST_CASE("is_gzip") {
spdlog::info("current path: {}", fs::current_path().string());
CHECK(bu::check_file_path({input_gz, input}));
CHECK(bp::details::is_gzip(input_gz));
CHECK_FALSE(bp::details::is_gzip(input));
}

TEST_CASE("read_fq") { CHECK_NOTHROW(bp::test_gz_read(input_gz)); }
TEST_CASE("read_fq") { CHECK_NOTHROW(test_gz_read(input_gz)); }
TEST_CASE("write_fq") {
CHECK_NOTHROW(test_gz_write("hello.txt"));
CHECK(bu::check_file_path("hello.txt.gz"));
fs::remove("hello.txt.gz");
}

TEST_CASE("test command line for gz fq") {
CHECK_NOTHROW(bp::split_fq(input_gz));
CHECK(bu::check_file_path("test.1.fq.gz"));
CHECK(bu::check_file_path("test.2.fq.gz"));
fs::remove("test.1.fq.gz");
fs::remove("test.2.fq.gz");
}

TEST_CASE("test command line for txt fq") {
CHECK_NOTHROW(bp::split_fq(input));
CHECK(bu::check_file_path("test.1.fq"));
CHECK(bu::check_file_path("test.2.fq"));
fs::remove("test.1.fq");
fs::remove("test.2.fq");
}
}

0 comments on commit 1f7119f

Please sign in to comment.