Skip to content

Commit

Permalink
Merge pull request #31 from PROBIC/refactor-input-reading
Browse files Browse the repository at this point in the history
Refactor input reading
  • Loading branch information
tmaklin authored Aug 20, 2024
2 parents c537327 + 1ae8a17 commit 2e1f2f3
Show file tree
Hide file tree
Showing 8 changed files with 356 additions and 86 deletions.
51 changes: 12 additions & 39 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -232,45 +232,18 @@ else()
set_target_properties(alignment-writer PROPERTIES EXCLUDE_FROM_ALL 1)
set(CMAKE_ALIGNMENT_WRITER_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer/include)
set(CMAKE_ALIGNMENT_WRITER_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libalignment-writer.a)
set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer/external/BitMagic-7.12.3/src/)
endif()
include_directories(${CMAKE_ALIGNMENT_WRITER_HEADERS})
target_link_libraries(mSWEEP ${CMAKE_ALIGNMENT_WRITER_LIBRARY})

## telescope
if (DEFINED CMAKE_TELESCOPE_LIBRARY AND DEFINED CMAKE_TELESCOPE_HEADERS)
message(STATUS "telescope headers provided in: ${CMAKE_TELESCOPE_HEADERS}")
message(STATUS "telescope library provided in: ${CMAKE_TELESCOPE_LIBRARY}")
## BitMagic
if (DEFINED CMAKE_BITMAGIC_HEADERS)
message(STATUS "BitMagic headers provided in: ${CMAKE_BITMAGIC_HEADERS}")
else()
FetchContent_Declare(telescope
GIT_REPOSITORY https://github.com/tmaklin/telescope.git
GIT_TAG v0.7.3
PREFIX "external"
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/telescope"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope"
BUILD_IN_SOURCE 0
CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS}
-D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS}
-D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS}
-D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS}
-D CMAKE_ALIGNMENT_WRITER_LIBRARY=${CMAKE_ALIGNMENT_WRITER_LIBRARY}
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src
-D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
-D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
-D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
-D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
INSTALL_COMMAND ""
)
FetchContent_MakeAvailable(telescope)
add_dependencies(telescope libalignmentwriter)
add_dependencies(mSWEEP telescope)
set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1)
set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/include)
set(CMAKE_TELESCOPE_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libtelescope.a)
set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src)
message(FATAL_ERROR "Provide BitMagic C++ headers with -DCMAKE_BITMAGIC_HEADERS")
endif()
include_directories(${CMAKE_TELESCOPE_HEADERS} ${CMAKE_BITMAGIC_HEADERS})
target_link_libraries(mSWEEP ${CMAKE_TELESCOPE_LIBRARY})
include_directories(${CMAKE_BITMAGIC_HEADERS})

## seamat
if (DEFINED CMAKE_SEAMAT_HEADERS)
Expand All @@ -283,6 +256,7 @@ else()
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/seamat"
BUILD_IN_SOURCE 0
CMAKE_ARGS -D CMAKE_BUILD_TESTS=0
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS}
BUILD_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
Expand Down Expand Up @@ -332,7 +306,7 @@ if (DEFINED CMAKE_MGEMS_LIBRARY AND DEFINED CMAKE_MGEMS_HEADERS)
else()
FetchContent_Declare(mGEMS
GIT_REPOSITORY https://github.com/PROBIC/mGEMS.git
GIT_TAG v1.3.0
GIT_TAG v1.3.3
PREFIX "external"
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS"
Expand All @@ -341,9 +315,8 @@ else()
-D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS}
-D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS}
-D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS}
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS}
-D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS}
-D CMAKE_TELESCOPE_HEADERS=${CMAKE_TELESCOPE_HEADERS}
-D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
-D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
Expand All @@ -352,13 +325,13 @@ else()
INSTALL_COMMAND ""
)
FetchContent_MakeAvailable(mGEMS)
add_dependencies(mGEMS telescope libalignmentwriter)
add_dependencies(mGEMS libalignmentwriter)
add_dependencies(mSWEEP libmgems)
set_target_properties(mGEMS PROPERTIES EXCLUDE_FROM_ALL 1)
set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include)
set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/include)
set(CMAKE_MGEMS_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/lib/libmgems.a)
endif()
target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY} ${CMAKE_TELESCOPE_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY})
target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY})
include_directories(${CMAKE_MGEMS_HEADERS})

include_directories(
Expand Down
73 changes: 56 additions & 17 deletions include/Likelihood.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#define MSWEEP_LIKELIHOOD_HPP

#include "Matrix.hpp"
#include "telescope.hpp"

#include "mSWEEP_openmp_config.hpp"

Expand All @@ -41,6 +40,7 @@
#include <algorithm>
#include <memory>

#include "mSWEEP_alignment.hpp"
#include "Grouping.hpp"

namespace mSWEEP {
Expand Down Expand Up @@ -106,52 +106,91 @@ class LL_WOR21 : public Likelihood<T> {
return ll_mat;
}

void fill_ll_mat(const telescope::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
void fill_ll_mat(const mSWEEP::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
size_t num_ecs = alignment.n_ecs();
size_t n_targets = alignment.get_n_targets();

size_t n_threads = 1;
#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
#pragma omp parallel
{
n_threads = omp_get_num_threads();
}
#endif

// This double loop is currently the slowest part in the input reading
std::vector<bm::sparse_vector<V, bm::bvector<>>> local_counts(n_threads);
#pragma omp parallel for schedule(static)
for (size_t i = 0; i < num_ecs; ++i) {
for (size_t j = 0; j < n_targets; ++j) {
if (alignment(i, j)) {
#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
local_counts[omp_get_thread_num()].inc((size_t)((size_t)alignment.get_groups()[j]*num_ecs) + i);
#else
local_counts[0].inc((size_t)((size_t)alignment.get_groups()[j]*num_ecs) + i);
#endif
}
}
}

bm::sparse_vector<V, bm::bvector<>> group_counts = std::move(local_counts[0]);
for (size_t i = 1; i < n_threads; ++i) {
group_counts.merge(local_counts[i]);
}

bool mask_groups = min_hits > 0;
this->groups_mask = std::vector<bool>(n_groups, !mask_groups);
std::vector<V> masked_group_sizes;
std::vector<size_t> groups_pos(n_groups, 0);
size_t n_masked_groups = 0;
if (mask_groups) {
std::vector<size_t> group_hit_counts(n_groups, (size_t)0);
// Create mask identifying groups that have at least 1 alignment
for (size_t i = 0; i < num_ecs; ++i) {
for (size_t j = 0; j < n_groups; ++j) {
group_hit_counts[j] += (alignment(j, i) > 0) * alignment.reads_in_ec(i);
#pragma omp parallel for schedule(static) reduction(vec_size_t_plus:group_hit_counts)
for (size_t j = 0; j < n_groups; ++j) {
for (size_t i = 0; i < num_ecs; ++i) {
group_hit_counts[j] += (group_counts[j*num_ecs + i] > 0) * alignment.reads_in_ec(i);
}
}

for (size_t i = 0; i < n_groups; ++i) {
this->groups_mask[i] = groups_mask[i] || (group_hit_counts[i] >= min_hits);
if (this->groups_mask[i]) {
groups_pos[i] = n_masked_groups;
masked_group_sizes.push_back(group_sizes[i]);
++n_masked_groups;
}
}
} else {
masked_group_sizes = group_sizes;
#pragma omp parallel for schedule(static)
for (size_t i = 0; i < n_groups; ++i) {
groups_pos[i] = i;
}
}
size_t n_masked_groups = masked_group_sizes.size();
n_masked_groups = masked_group_sizes.size();

this->update_bb_parameters(masked_group_sizes, n_masked_groups, this->bb_constants);
const seamat::DenseMatrix<T> &precalc_lls_mat = this->precalc_lls(masked_group_sizes, n_masked_groups);

this->log_likelihoods.resize(n_masked_groups, num_ecs, std::log(this->zero_inflation));
for (size_t j = 0; j < num_ecs; ++j) {
size_t groups_pos = 0;
for (size_t i = 0; i < n_groups; ++i) {

#pragma omp parallel for schedule(static)
for (size_t i = 0; i < n_groups; ++i) {
if (this->groups_mask[i]) {
this->log_likelihoods(groups_pos, j) = precalc_lls_mat(groups_pos, alignment(i, j));
++groups_pos;
for (size_t j = 0; j < num_ecs; ++j) {
this->log_likelihoods(groups_pos[i], j) = precalc_lls_mat(groups_pos[i], group_counts[i*num_ecs + j]);
}
}
}
}
}

void fill_ec_counts(const telescope::Alignment &alignment) {
void fill_ec_counts(const mSWEEP::Alignment &alignment) {
// Fill log ec counts.
this->log_ec_counts.resize(alignment.n_ecs(), 0);
#pragma omp parallel for schedule(static)
for (size_t i = 0; i < alignment.n_ecs(); ++i) {
this->log_ec_counts[i] = std::log(alignment.reads_in_ec(i));
this->log_ec_counts[i] = std::log(alignment.reads_in_ec(i));
}
}

Expand All @@ -170,14 +209,14 @@ class LL_WOR21 : public Likelihood<T> {
public:
LL_WOR21() = default;

LL_WOR21(const std::vector<V> &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
LL_WOR21(const std::vector<V> &group_sizes, const mSWEEP::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
this->bb_constants[0] = tol;
this->bb_constants[1] = frac_mu;
this->zero_inflation = _zero_inflation;
this->from_grouped_alignment(alignment, group_sizes, n_groups, min_hits);
}

void from_grouped_alignment(const telescope::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
void from_grouped_alignment(const mSWEEP::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
this->fill_ll_mat(alignment, group_sizes, n_groups, min_hits);
this->fill_ec_counts(alignment);
}
Expand Down Expand Up @@ -292,7 +331,7 @@ class LL_WOR21 : public Likelihood<T> {
const std::vector<bool>& groups_considered() const override { return this->groups_mask; };
};
template <typename T>
std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood(const mSWEEP::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
size_t max_group_size = grouping.max_group_size();
size_t n_groups = grouping.get_n_groups();
std::unique_ptr<Likelihood<T>> log_likelihoods;
Expand Down
22 changes: 11 additions & 11 deletions include/Sample.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <memory>

#include "Matrix.hpp"
#include "telescope.hpp"
#include "mSWEEP_alignment.hpp"

namespace mSWEEP {
class Sample {
Expand All @@ -47,7 +47,7 @@ class Sample {
std::vector<double> log_KLDs;

protected:
void count_alignments(const telescope::Alignment &alignment);
void count_alignments(const mSWEEP::Alignment &alignment);

public:
// Virtual functions
Expand Down Expand Up @@ -111,7 +111,7 @@ class PlainSample : public Sample {
public:
PlainSample() = default;

PlainSample(const telescope::Alignment &alignment) {
PlainSample(const mSWEEP::Alignment &alignment) {
this->count_alignments(alignment);
}

Expand All @@ -132,7 +132,7 @@ class BinningSample : public PlainSample, public Binning {
public:
BinningSample() = default;

BinningSample(const telescope::Alignment &alignment) {
BinningSample(const mSWEEP::Alignment &alignment) {
this->count_alignments(alignment);
this->store_aligned_reads(alignment.get_aligned_reads());
}
Expand All @@ -157,19 +157,19 @@ class BootstrapSample : public Sample {
std::vector<std::vector<double>> bootstrap_results;

// Set all variables required to bootstrap the ec_counts later
void init_bootstrap(const telescope::Alignment &alignment);
void init_bootstrap(const mSWEEP::Alignment &alignment);

protected:
void construct(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t bootstrap_count=0);
void construct(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t bootstrap_count=0);

public:
BootstrapSample() = default;

// Set seed in constructor
BootstrapSample(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed) {
BootstrapSample(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed) {
this->construct(alignment, _iters, seed);
}
BootstrapSample(const telescope::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
BootstrapSample(const mSWEEP::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
this->construct(alignment, _iters, seed, _bootstrap_count);
}

Expand All @@ -191,18 +191,18 @@ class BootstrapSample : public Sample {

class BinningBootstrap : public BootstrapSample, public Binning {
public:
BinningBootstrap(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed) {
BinningBootstrap(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed) {
this->construct(alignment, _iters, seed);
this->store_aligned_reads(alignment.get_aligned_reads());
}
BinningBootstrap(const telescope::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
BinningBootstrap(const mSWEEP::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) {
this->construct(alignment, _iters, seed, _bootstrap_count);
this->store_aligned_reads(alignment.get_aligned_reads());
}

};

void ConstructSample(const telescope::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr<Sample> &sample);
void ConstructSample(const mSWEEP::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr<Sample> &sample);

}

Expand Down
Loading

0 comments on commit 2e1f2f3

Please sign in to comment.