From cd99393006b1ee22d82ebb6b73bae7a36556997d Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 17 May 2024 10:58:30 -0700 Subject: [PATCH] Make overlay construction less parallel and lookup hopefully faster --- bdsg/include/bdsg/overlays/packed_path_position_overlay.hpp | 2 +- bdsg/include/bdsg/overlays/packed_reference_path_overlay.hpp | 2 +- bdsg/src/packed_path_position_overlay.cpp | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/bdsg/include/bdsg/overlays/packed_path_position_overlay.hpp b/bdsg/include/bdsg/overlays/packed_path_position_overlay.hpp index 55a7ade8..275f1c45 100644 --- a/bdsg/include/bdsg/overlays/packed_path_position_overlay.hpp +++ b/bdsg/include/bdsg/overlays/packed_path_position_overlay.hpp @@ -34,7 +34,7 @@ class PackedPositionOverlay : public PathPositionHandleGraph, public ExpandingOv /// Make a new PackedPositionOverlay, on the given graph. Glom short paths /// together to make internal indexes each over at least the given number /// of steps. - PackedPositionOverlay(const PathHandleGraph* graph, size_t steps_per_index = 1000000); + PackedPositionOverlay(const PathHandleGraph* graph, size_t steps_per_index = 20000000); PackedPositionOverlay() = default; PackedPositionOverlay(const PackedPositionOverlay& other) = default; PackedPositionOverlay(PackedPositionOverlay&& other) = default; diff --git a/bdsg/include/bdsg/overlays/packed_reference_path_overlay.hpp b/bdsg/include/bdsg/overlays/packed_reference_path_overlay.hpp index 39f2dbaa..eb41d359 100644 --- a/bdsg/include/bdsg/overlays/packed_reference_path_overlay.hpp +++ b/bdsg/include/bdsg/overlays/packed_reference_path_overlay.hpp @@ -29,7 +29,7 @@ class PackedReferencePathOverlay : public PackedPositionOverlay { /// Make a PackedReferencePathOverlay. Do the indexing and compute the /// additional indexes that the base class doesn't have. - PackedReferencePathOverlay(const PathHandleGraph* graph, size_t steps_per_index = 1000000); + PackedReferencePathOverlay(const PathHandleGraph* graph, size_t steps_per_index = 20000000); // We assume that tracing out a path is fast in the backing graph, but // finding visits on nodes is slow. We override the reverse lookups to go diff --git a/bdsg/src/packed_path_position_overlay.cpp b/bdsg/src/packed_path_position_overlay.cpp index 168d7c6c..1ecdf308 100644 --- a/bdsg/src/packed_path_position_overlay.cpp +++ b/bdsg/src/packed_path_position_overlay.cpp @@ -252,6 +252,7 @@ void PackedPositionOverlay::index_path_positions() { // And this will be the cumulative path length of all the paths in each collection. std::vector path_set_steps; + size_t total_length = 0; size_t accumulated_length = 0; for (size_t i = 0; i < path_handles.size(); i++) { if (accumulated_length >= steps_per_index) { @@ -264,6 +265,7 @@ void PackedPositionOverlay::index_path_positions() { #endif path_set_steps.push_back(accumulated_length); + total_length += accumulated_length; accumulated_length = 0; } // Remember that this path's steps went into this index. @@ -271,13 +273,14 @@ void PackedPositionOverlay::index_path_positions() { } bounds.push_back(path_handles.size()); path_set_steps.push_back(accumulated_length); + total_length += accumulated_length; // Now we know how many indexes we need this->set_index_count(path_set_steps.size()); #ifdef debug #pragma omp critical (cerr) - std::cerr << "Using " << indexes.size() << " indexes" << std::endl; + std::cerr << "Using " << indexes.size() << " indexes for " << total_length << " total steps" << std::endl; #endif #pragma omp parallel for