Skip to content

Commit

Permalink
Solves multiprocessing pandora map --discover issue with GATB graph…
Browse files Browse the repository at this point in the history
… creation (closes #195)
  • Loading branch information
leandro committed Nov 15, 2019
1 parent bd4dbee commit 951717a
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 64 deletions.
2 changes: 1 addition & 1 deletion include/denovo_discovery/denovo_discovery.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DenovoDiscovery {
DenovoDiscovery(const uint_least8_t& kmer_size, const double& read_error_rate,
const uint8_t max_insertion_size = 15);

void find_paths_through_candidate_region(CandidateRegion& candidate_region);
void find_paths_through_candidate_region(CandidateRegion& candidate_region, const fs::path &denovo_output_directory);

double calculate_kmer_coverage(
const uint32_t& read_covg, const uint32_t& ref_length) const;
Expand Down
2 changes: 1 addition & 1 deletion include/denovo_discovery/local_assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ bool string_ends_with(std::string const& query, std::string const& ending);

std::string reverse_complement(const std::string& forward);

void remove_graph_file();
void remove_graph_file (const fs::path &prefix);

std::vector<std::string> generate_start_kmers(
const std::string& sequence, const uint16_t& k, uint32_t num_to_generate);
Expand Down
16 changes: 9 additions & 7 deletions src/denovo_discovery/denovo_discovery.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#include "denovo_discovery/denovo_discovery.h"

void DenovoDiscovery::find_paths_through_candidate_region(
CandidateRegion& candidate_region)
CandidateRegion& candidate_region, const fs::path &denovo_output_directory)
{
const auto read_covg { candidate_region.pileup.size() };
const auto length_of_candidate_sequence {
candidate_region.max_likelihood_sequence.length()
};
const double expected_kmer_covg { calculate_kmer_coverage(
read_covg, length_of_candidate_sequence) };
const fs::path GATB_graph_filepath (denovo_output_directory / "GATB_graph");

BOOST_LOG_TRIVIAL(debug) << "Running local assembly for: "
<< candidate_region.get_name() << " - interval ["
Expand All @@ -32,10 +33,11 @@ void DenovoDiscovery::find_paths_through_candidate_region(
LocalAssemblyGraph graph;

try {
const std::string GATB_graph_filepath_as_string = GATB_graph_filepath.string();
Graph gatb_graph
= LocalAssemblyGraph::create(new BankStrings(candidate_region.pileup),
"-kmer-size %d -abundance-min %d -verbose 0 -nb-cores 1", kmer_size,
min_covg_for_node_in_assembly_graph);
"-kmer-size %d -abundance-min %d -verbose 0 -nb-cores 1 -out %s", kmer_size,
min_covg_for_node_in_assembly_graph, GATB_graph_filepath_as_string.c_str());
if (clean_assembly_graph) {
clean(gatb_graph);
}
Expand All @@ -44,7 +46,7 @@ void DenovoDiscovery::find_paths_through_candidate_region(
} catch (gatb::core::system::Exception& error) {
BOOST_LOG_TRIVIAL(debug) << "Couldn't create GATB graph."
<< "\n\tEXCEPTION: " << error.getMessage();
remove_graph_file();
remove_graph_file(GATB_graph_filepath);
return;
}

Expand Down Expand Up @@ -76,7 +78,7 @@ void DenovoDiscovery::find_paths_through_candidate_region(
start_node, end_node, max_path_length, expected_kmer_covg);

if (abandoned) {
remove_graph_file();
remove_graph_file(GATB_graph_filepath);
return;
}

Expand Down Expand Up @@ -105,7 +107,7 @@ void DenovoDiscovery::find_paths_through_candidate_region(
.append(candidate_region.right_flanking_sequence);
}

remove_graph_file();
remove_graph_file(GATB_graph_filepath);
return;
}
}
Expand All @@ -114,7 +116,7 @@ void DenovoDiscovery::find_paths_through_candidate_region(
BOOST_LOG_TRIVIAL(debug) << "Could not find any combination of start and end "
"k-mers. Skipping local assembly for "
<< candidate_region.get_name();
remove_graph_file();
remove_graph_file(GATB_graph_filepath);
}

DenovoDiscovery::DenovoDiscovery(const uint_least8_t& kmer_size,
Expand Down
6 changes: 3 additions & 3 deletions src/denovo_discovery/local_assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,10 @@ void LocalAssemblyGraph::build_paths_between(const std::string& start_kmer,
}
}

void remove_graph_file()
void remove_graph_file (const fs::path &prefix)
{
const fs::path p { "dummy.h5" };
fs::remove(p);
const fs::path h5_file(prefix.string() + ".h5");
fs::remove(h5_file);
}

void clean(Graph& graph, const uint16_t& num_cores)
Expand Down
2 changes: 1 addition & 1 deletion src/map_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ int pandora_map(int argc, char *argv[]) {

for (auto &element : candidate_regions) {
auto &candidate_region {element.second};
denovo.find_paths_through_candidate_region(candidate_region); //TODO: this is hard to parallelize due to GATB's temp files
denovo.find_paths_through_candidate_region(candidate_region, denovo_output_directory); //TODO: this is hard to parallelize due to GATB's temp files
candidate_region.write_denovo_paths_to_file(denovo_output_directory);
}
}
Expand Down
22 changes: 11 additions & 11 deletions test/denovo_discovery/denovo_discovery_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ TEST(FindPathsThroughCandidateRegionTest, emptyPileupReturnsEmpty)
CandidateRegion candidate_region { Interval(0, 1), "test" };
candidate_region.max_likelihood_sequence = "ATGCGCTGAGAGTCGGACT";

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected;
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -126,7 +126,7 @@ TEST(FindPathsThroughCandidateRegionTest, kmerSizeBiggerThanCandidateReturnsEmpt
candidate_region.max_likelihood_sequence = "ATGCGCTGAGAGTCGGACT";
candidate_region.pileup = { "FOO", "BAR" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected;
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -144,7 +144,7 @@ TEST(FindPathsThroughCandidateRegionTest,
candidate_region.max_likelihood_sequence = "ATGCGCTGAGAGTCGGACT";
candidate_region.pileup = { "FOO", "BAR" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected;
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -161,7 +161,7 @@ TEST(FindPathsThroughCandidateRegionTest, startKmersDontExistInGraphReturnEmpty)
candidate_region.max_likelihood_sequence = "GGGGGGGGGGAGTCGGACT";
candidate_region.pileup = { "ATGCGCTGAGAGTCGGACT", "ATGCGCTGAGAGTCGGACT" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected;
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -178,7 +178,7 @@ TEST(FindPathsThroughCandidateRegionTest, endKmersDontExistInGraphReturnEmpty)
candidate_region.max_likelihood_sequence = "ATGCGCTGAGCCCCCCCCC";
candidate_region.pileup = { "ATGCGCTGAGAGTCGGACT", "ATGCGCTGAGAGTCGGACT" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected;
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -196,7 +196,7 @@ TEST(FindPathsThroughCandidateRegionTest, endKmerExistsInStartKmersFindPathAndCy
candidate_region.max_likelihood_sequence = "ATGCGCTGAGATGCGCTGA";
candidate_region.pileup = { "ATGCGCTGACATGCGCTGA", "ATGCGCTGACATGCGCTGA" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected { "ATGCGCTGACATGCGCTGA", "ATGCGCTGACATGCGCTGACATGCGCTGA",
"ATGCGCTGACATGCGCTGACATGCGCTGACATGCGCTGA",
Expand All @@ -220,7 +220,7 @@ TEST(FindPathsThroughCandidateRegionTest,
candidate_region.pileup
= { "ATGCGCTGAGAGTCGGACT", "ATGCGCTGAGAGTCGGACT", "AAATAAA", "GCGGCGCGGCC" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected { "ATGCGCTGAGAGTCGGACT" };
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -237,7 +237,7 @@ TEST(FindPathsThroughCandidateRegionTest, twoIdenticalReadsReturnOnePath)
candidate_region.max_likelihood_sequence = "ATGCGCTGAGAGTCGGACT";
candidate_region.pileup = { "ATGCGCTGAGAGTCGGACT", "ATGCGCTGAGAGTCGGACT" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected { "ATGCGCTGAGAGTCGGACT" };
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -255,7 +255,7 @@ TEST(
candidate_region.max_likelihood_sequence = "ATGCGCTGAGAGTCGGACT";
candidate_region.pileup = { "ATGCGCTGAGAGTCGGACT", "ATGCGCTGATAGTCGGACT" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected;
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -274,7 +274,7 @@ TEST(FindPathsThroughCandidateRegionTest,
candidate_region.pileup
= { "ATGCGCTGAGAGTCGGACT", "ATGCGCTGAGAGTCGGACT", "ATGCGCTGATAGTCGGACT" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");

const DenovoPaths expected { "ATGCGCTGAGAGTCGGACT" };
const auto& actual { candidate_region.denovo_paths };
Expand All @@ -292,7 +292,7 @@ TEST(FindPathsThroughCandidateRegionTest, twoPossiblePathsWithGoodCovgReturnsTwo
candidate_region.pileup = { "ATGCGCTGAGAGTCGGACT", "ATGCGCTGAGAGTCGGACT",
"ATGCGCTGATAGTCGGACT", "ATGCGCTGATAGTCGGACT" };

denovo.find_paths_through_candidate_region(candidate_region);
denovo.find_paths_through_candidate_region(candidate_region, ".");
std::sort(
candidate_region.denovo_paths.begin(), candidate_region.denovo_paths.end());

Expand Down
Loading

0 comments on commit 951717a

Please sign in to comment.