Skip to content

Commit

Permalink
Remove preliminary output
Browse files Browse the repository at this point in the history
  • Loading branch information
eaasna committed Sep 12, 2023
1 parent 7d55299 commit e6e8ddc
Show file tree
Hide file tree
Showing 15 changed files with 37 additions and 406 deletions.
3 changes: 3 additions & 0 deletions include/utilities/consolidate/consolidate_matches.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <filesystem>

#include <valik/shared.hpp>
#include <valik/split/database_metadata.hpp>

Expand All @@ -12,6 +14,7 @@ namespace valik
* @brief Function that removes duplicates from split Stellar search results and writes the final output file.
*
* @param arguments Command line arguments.
* @return false If could not clean up intermediate results.
*/
void consolidate_matches(search_arguments const & arguments);

Expand Down
2 changes: 2 additions & 0 deletions src/consolidate/consolidate_matches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ void consolidate_matches(search_arguments const & arguments)
auto ref_meta = database_metadata(arguments.ref_meta_path, false);
auto matches = read_stellar_output(arguments.all_matches, ref_meta);

seqan3::debug_stream << std::string(arguments.all_matches) << '\t' << matches.size() << '\n';

std::sort( matches.begin(), matches.end(), std::greater<stellar_match>());
matches.erase( std::unique( matches.begin(), matches.end() ), matches.end() );

Expand Down
6 changes: 6 additions & 0 deletions src/valik_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,13 @@ void valik_search(search_arguments const & arguments)

// Consolidate matches (not necessary when searching a metagenomic database)
if (!arguments.ref_meta_path.empty())
{
consolidate_matches(arguments);
const bool error_in_delete = !std::filesystem::remove(arguments.all_matches);
if (error_in_delete)
std::cerr << "Could not clean up intermediate file: \t" << std::string(arguments.all_matches) << '\n';
failed = failed || error_in_delete;
}

if (arguments.write_time)
write_time_statistics(time_statistics, arguments.out_file.string() + ".time");
Expand Down
14 changes: 7 additions & 7 deletions test/api/utilities/consolidate/consolidate_matches_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <utilities/consolidate/consolidate_matches.hpp>

// Generate the full path of a test input file that is provided in the data directory.
std::filesystem::path data(std::string const & filename)
std::filesystem::path data_path(std::string const & filename)
{
return std::filesystem::path{std::string{DATADIR}}.concat(filename);
}
Expand All @@ -15,7 +15,7 @@ std::filesystem::path consolidation_input_path(size_t const number_of_bins, size
name += "bins";
name += std::to_string(overlap);
name += "overlap_dream_all.gff";
return data(name);
return data_path(name);
}

std::filesystem::path consolidation_meta_path(size_t const number_of_bins, size_t const overlap) noexcept
Expand All @@ -25,7 +25,7 @@ std::filesystem::path consolidation_meta_path(size_t const number_of_bins, size_
name += "bins";
name += std::to_string(overlap);
name += "overlap_reference_metadata.tsv";
return data(name);
return data_path(name);
}

void compare_gff_out(std::vector<valik::stellar_match> const & expected,
Expand Down Expand Up @@ -62,8 +62,8 @@ TEST(consolidate_matches, bins8overlap50)
valik::consolidate_matches(arguments);

valik::database_metadata reference(arguments.ref_meta_path, false);
auto expected = valik::read_stellar_output(data("stellar_truth.gff"), reference, std::ios::binary);
auto actual = valik::read_stellar_output("consolidated.gff", reference);
auto expected = valik::read_stellar_output(data_path("stellar_truth.gff"), reference, std::ios::binary);
auto actual = valik::read_stellar_output("consolidated.gff", reference, std::ios::binary);

compare_gff_out(expected, actual);
}
Expand All @@ -81,8 +81,8 @@ TEST(consolidate_matches, bins16overlap50)
valik::consolidate_matches(arguments);

valik::database_metadata reference(arguments.ref_meta_path, false);
auto expected = valik::read_stellar_output(data("stellar_truth.gff"), reference, std::ios::binary);
auto actual = valik::read_stellar_output("consolidated.gff", reference);
auto expected = valik::read_stellar_output(data_path("stellar_truth.gff"), reference, std::ios::binary);
auto actual = valik::read_stellar_output("consolidated.gff", reference, std::ios::binary);

compare_gff_out(expected, actual);
}
20 changes: 10 additions & 10 deletions test/api/valik/split/write_seg_sequences_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <seqan3/test/expect_range_eq.hpp>

// Generate the full path of a test input file that is provided in the data directory.
static std::filesystem::path data(std::string const & filename)
static std::filesystem::path data_path(std::string const & filename)
{
return std::filesystem::path{std::string{DATADIR}}.concat(filename);
}
Expand Down Expand Up @@ -35,17 +35,17 @@ static void const test_reference_out(size_t overlap, size_t bins)
{
std::string path_prefix = "write_out_" + std::to_string(overlap) + "_" + std::to_string(bins);

valik::database_metadata reference(data(path_prefix + "_reference_metadata.txt"), false);
valik::database_segments segments(data(path_prefix + "_reference_segments.txt"));
valik::write_reference_segments(reference, segments, data("database.fasta"));
valik::database_metadata reference(data_path(path_prefix + "_reference_metadata.txt"), false);
valik::database_segments segments(data_path(path_prefix + "_reference_segments.txt"));
valik::write_reference_segments(reference, segments, data_path("database.fasta"));

for (size_t i = 0; i < bins - 1; i++)
{
valik::database_segments::segment current_seg = segments.members[i];
valik::database_segments::segment next_seg = segments.members[i + 1];

std::string current_seg_seq = string_from_file(data("database_" + std::to_string(i) + ".fasta"), std::ios::binary);
std::string next_seg_seq = string_from_file(data("database_" + std::to_string(i + 1) + ".fasta"), std::ios::binary);
std::string current_seg_seq = string_from_file(data_path("database_" + std::to_string(i) + ".fasta"), std::ios::binary);
std::string next_seg_seq = string_from_file(data_path("database_" + std::to_string(i + 1) + ".fasta"), std::ios::binary);
trim_fasta_id(current_seg_seq);
trim_fasta_id(next_seg_seq);

Expand Down Expand Up @@ -95,15 +95,15 @@ static void const test_query_out(size_t overlap, size_t bins)
{
std::string path_prefix = "write_out_" + std::to_string(overlap) + "_" + std::to_string(bins);

valik::database_metadata reference(data(path_prefix + "_reference_metadata.txt"), false);
valik::database_segments segments(data(path_prefix + "_reference_segments.txt"));
valik::write_query_segments(reference, segments, data("database.fasta"));
valik::database_metadata reference(data_path(path_prefix + "_reference_metadata.txt"), false);
valik::database_segments segments(data_path(path_prefix + "_reference_segments.txt"));
valik::write_query_segments(reference, segments, data_path("database.fasta"));

using sequence_file_t = seqan3::sequence_file_input<valik::dna4_traits, seqan3::fields<seqan3::field::seq>>;

size_t i{0};
seqan3::dna4_vector previous_seg_seq;
for (auto && [current_seg_seq] : sequence_file_t{data("database.segments.fasta")})
for (auto && [current_seg_seq] : sequence_file_t{data_path("database.segments.fasta")})
{
if (i > 1)
{
Expand Down
5 changes: 0 additions & 5 deletions test/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,6 @@ target_use_datasources (valik_test FILES 8bins23window1error100pattern1overlap.g
target_use_datasources (valik_test FILES 8bins23window1error100pattern40overlap.gff.out)
target_use_datasources (valik_test FILES query.fq)

target_use_datasources (valik_test FILES 16bins50overlap_dream_all.gff)
target_use_datasources (valik_test FILES 16bins50overlap_reference_metadata.tsv)
target_use_datasources (valik_test FILES 8bins50overlap_dream_all.gff)
target_use_datasources (valik_test FILES 8bins50overlap_reference_metadata.tsv)
target_use_datasources (valik_test FILES multi_seq_ref.fasta)
target_use_datasources (valik_test FILES query_e0.06.fasta)

add_cli_test (dream_test.cpp)
Expand Down
12 changes: 0 additions & 12 deletions test/data/datasources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,6 @@ declare_datasource (FILE 16bins13window1error.gff
declare_datasource (FILE 16bins13window1error.gff.out
URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window1error.gff.out
URL_HASH SHA256=89e61426004aa15bff39de90b4cd2d7e9080dc19cc3596a797f00d7c8f0c95c0)
declare_datasource (FILE 16bins13window1error.gff.preliminary
URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window1error.gff.preliminary
URL_HASH SHA256=1b554d97c35e6bc5cfbda12a8fb817a980da5aeeb8d0d62a827d4996a18ab245)
declare_datasource (FILE 16bins13window.ibf
URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window.ibf
URL_HASH SHA256=0875cd2d90d1320f93c575f3197e7879a5b8e51c5c11de76975561d30e4cf324)
Expand All @@ -221,9 +218,6 @@ declare_datasource (FILE 16bins15window1error.gff
declare_datasource (FILE 16bins15window1error.gff.out
URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window1error.gff.out
URL_HASH SHA256=66d219613d6a715bbf6cd282868ee31612147287471b7e1820ca77bc370dec9d)
declare_datasource (FILE 16bins15window1error.gff.preliminary
URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window1error.gff.preliminary
URL_HASH SHA256=d728395b3760ef0dc97e4ca37aa324b7ac4aa25601f6e62143a7b189e792b24b)
declare_datasource (FILE 16bins15window.ibf
URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window.ibf
URL_HASH SHA256=7f1ce2bbdf8d657da29d39879ab23c68cb19dbe0b58c69a9c5a576f6528ad24c)
Expand All @@ -233,9 +227,6 @@ declare_datasource (FILE 4bins13window1error.gff
declare_datasource (FILE 4bins13window1error.gff.out
URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window1error.gff.out
URL_HASH SHA256=5d285b543887e94829f15112b8d3c9659d48ff61a74cb216f2fa9f4bb54b4c90)
declare_datasource (FILE 4bins13window1error.gff.preliminary
URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window1error.gff.preliminary
URL_HASH SHA256=a095ac60b3077cbb3a34a97886b19bb2cfcc0936cdbc8938694d7c829a06547d)
declare_datasource (FILE 4bins13window.ibf
URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window.ibf
URL_HASH SHA256=b08ec3c196dc45faf73c24b86113e2c89adaf3d1844799d646e25dc0e77ac6bb)
Expand All @@ -245,9 +236,6 @@ declare_datasource (FILE 4bins15window1error.gff
declare_datasource (FILE 4bins15window1error.gff.out
URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window1error.gff.out
URL_HASH SHA256=d7c0691b24eb8c949eb522b9844d647b62b0a621953692b15cad5c022128c29e)
declare_datasource (FILE 4bins15window1error.gff.preliminary
URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window1error.gff.preliminary
URL_HASH SHA256=163e226fba9cca687ea233209d291bba4558cee6ef0a3d00e7b61631c712cd9e)
declare_datasource (FILE 4bins15window.ibf
URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window.ibf
URL_HASH SHA256=8b584d0e55043b3cc0835674dc83f5a7db6143645071f49973a1d085ac4fb919)
Expand Down
102 changes: 0 additions & 102 deletions test/data/dream/16bins13window1error.gff.preliminary

This file was deleted.

Loading

0 comments on commit e6e8ddc

Please sign in to comment.