From 6211f0d950aa69b4ad7cca95f865d246fe7f227e Mon Sep 17 00:00:00 2001 From: Steffen Hirschmann Date: Mon, 25 Jan 2021 10:18:23 +0100 Subject: [PATCH] Refactor local partitioning routines --- kdpart.h | 271 +++++++++++++++++---------------------------- kdpart_test_par.cc | 71 ++---------- 2 files changed, 111 insertions(+), 231 deletions(-) diff --git a/kdpart.h b/kdpart.h index 5d28095..fec03a9 100644 --- a/kdpart.h +++ b/kdpart.h @@ -293,6 +293,18 @@ struct PartTreeStorage { walk(std::forward(p), std::forward(f), 0); } + + /* Depth-first traversal the subtree of node i (including i itself) + * calling function f on each node. + * @param f bool function with a single parameter of type + * const_node_access_type. Return value determines if "walk" + * further descends into subtree. + */ + template + void walk_post(Func &&f) { + walk_post(std::forward(f), 0); + } + /** Invalidates the information in the subtree rooted at "root". * Used for local repartitioning. */ @@ -366,6 +378,22 @@ struct PartTreeStorage { } } + + /* Depth-first traversal the subtree of node i (including i itself) + * calling function f on each node. + * @param f bool function with a single parameter of type + * const_node_access_type. Return value determines if "walk" + * further descends into subtree. + */ + template + void walk_post(Func f, int i) { + const bool b = f(node(i)); + if (b && inner[i]) { + walk_post(f, 2 * i + 1); + walk_post(f, 2 * i + 2); + } + } + /// Descend direction for find(Chooser) enum class Descend { Left, Right @@ -826,20 +854,8 @@ struct mpi_helper> } }; -} - -/** Repartitions a kd tree inline by means of local communication of the weights - * and local computation of the new splits. - * - * Repartitioning is done between 2 or 3 processes that share the same - * level 1 or level 2 subtree. - * - * @returns Parameter "s" - * - * @see repart_parttree_par - */ -template -PartTreeStorage& repart_parttree_par_local(PartTreeStorage& s, MPI_Comm comm, const std::vector& cellweights) +template +PartTreeStorage& _repart_parttree_par_local_impl(PartTreeStorage& s, MPI_Comm comm, const std::vector& cellweights, RepartRootFunc &&get_subtree_repartition_root, IsRepartRootPred &&is_repartition_subtree_root) { int size; MPI_Comm_size(comm, &size); @@ -854,35 +870,35 @@ PartTreeStorage& repart_parttree_par_local(PartTreeStorage& s, MPI_Comm comm, co int rank; MPI_Comm_rank(comm, &rank); auto my_leaf = s.node_of_rank(rank); - auto my_limb_root = my_leaf.find_limbend_root(); + auto my_subtree_root = get_subtree_repartition_root(my_leaf); /* Get all ranks that have subdomains participating in this limb end */ - std::vector limb_neighbors; - limb_neighbors.reserve(3); - for (int r = my_limb_root.pstart(); r < my_limb_root.pend() + 1; ++r) { - limb_neighbors.push_back(r); + std::vector subtree_neighbors; + subtree_neighbors.reserve(3); + for (int r = my_subtree_root.pstart(); r < my_subtree_root.pend() + 1; ++r) { + subtree_neighbors.push_back(r); } - /* Distribute weights to "limb_neighbors" */ + /* Distribute weights to "subtree_neighbors" */ // A group of processes participating in a limb is identified by the smallest // rank in this limb end. - const int limb_group = *std::min_element(limb_neighbors.begin(), limb_neighbors.end()); + const int subtree_group_id = my_subtree_root.pstart(); MPI_Comm neighcomm; - MPI_Comm_split(comm, limb_group, rank, &neighcomm); + MPI_Comm_split(comm, subtree_group_id, rank, &neighcomm); util::GlobalVector neighbor_load(neighcomm, cellweights); // Setup a mapping from ranks relative to "comm" to ranks relative to "neighcomm" // Since we chose the rank relative to "comm" as key in MPI_Comm_split, // we could also do this manually: - // rank_to_neighrank[min(limb_neighbors)] = 0 - // rank_to_neighrank[max(limb_neighbors)] = limb_neighbors.size() - 1; + // rank_to_neighrank[min(subtree_neighbors)] = 0 + // rank_to_neighrank[max(subtree_neighbors)] = subtree_neighbors.size() - 1; // And if it is a limbend of size 3 assign rank "1" to the middle element. // But this is less error prone if we ever choose to change the Comm_split. - const std::map rank_to_neighrank = map_ranks_to_comm(comm, limb_neighbors, neighcomm); + const std::map rank_to_neighrank = map_ranks_to_comm(comm, subtree_neighbors, neighcomm); using cell_type = std::array; - auto neighbor_load_func = [&old_part, &neighbor_load, &rank_to_neighrank, &limb_neighbors](const cell_type& c){ + auto neighbor_load_func = [&old_part, &neighbor_load, &rank_to_neighrank, &subtree_neighbors](const cell_type& c){ auto n = old_part.node_of_cell(c); // Transform c to process ("rank") local coordinates @@ -895,7 +911,7 @@ PartTreeStorage& repart_parttree_par_local(PartTreeStorage& s, MPI_Comm comm, co const auto i = LinearizeFn(loc_c, loc_box); // Map rank (relative to "comm") to "neighcomm". - assert(std::find(limb_neighbors.begin(), limb_neighbors.end(), n.rank()) != limb_neighbors.end()); + assert(std::find(subtree_neighbors.begin(), subtree_neighbors.end(), n.rank()) != subtree_neighbors.end()); const auto rank = rank_to_neighrank.at(n.rank()); assert(neighbor_load.size(rank) > i); @@ -908,146 +924,13 @@ PartTreeStorage& repart_parttree_par_local(PartTreeStorage& s, MPI_Comm comm, co return quality_splitting(codimload(split_dir, lu, ro), nproc, nproc_left); }; + const bool is_responsible_process = rank == subtree_group_id; /* Re-partition limb ends * The process with smallest rank number is responsible for re-creating * this limb */ - if (rank == my_limb_root.pstart()) { - auto nproc1 = my_limb_root.child1().nproc(); - auto nproc2 = my_limb_root.child2().nproc(); - - // Passing "nproc1" as last parameter ensures that "nproc1" and - // "nproc2" do not change. - impl::split_node(my_limb_root, splitfunc, nproc1); - // Split_node avoids setting "inner", so do it manually - // This is also the reason why we use ".nproc()" to distinguish between - // nodes and not ".is_inner()"! - auto child1 = my_limb_root.child1(), child2 = my_limb_root.child2(); - child1.inner() = 0; - child2.inner() = 0; - - assert(nproc1 == child1.nproc()); - assert(nproc2 == child2.nproc()); - - // In case of a 3-leaf limb split a child - if (child1.nproc() > 1) { - impl::split_node(child1, splitfunc); - assert(child2.nproc() == 1); - } else if (child2.nproc() > 1) { - impl::split_node(child2, splitfunc); - assert(child1.nproc() == 1); - } else { - assert(limb_neighbors.size() == 2); - } - } - - /* Invalidate all limb ends - * Limb ends are invalidated on all processes not participating as well as on - * all participating processes BUT the one that performed the new split - */ - s.walk([my_limb_root, &s, rank](auto node){ - if (node.is_limbend() && node != my_limb_root && rank != my_limb_root.pstart()) { - s.invalidate_subtree(node); - // Stop "walk" from descending into nothing - node.inner() = 0; - } - }); - - /* Re-distribute all changes - * Use an Allreduce operation with MPI_MAX as operation. - * This works because we set all fields of all limb ends to "0" above. - */ - std::vector reqs{}; - reqs.reserve(8); - s.apply_to_data_vectors([comm, &reqs](auto &vec){ - using value_type = typename std::remove_reference::type::value_type; - reqs.push_back(mpi_helper::iallreduce(vec, MPI_MAX, comm)); - }); - MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE); - - MPI_Comm_free(&neighcomm); - return s; -} - -template -PartTreeStorage& repart_parttree_par_local_top(PartTreeStorage& s, MPI_Comm comm, const std::vector& cellweights, int depth) -{ - int size; - MPI_Comm_size(comm, &size); - - // Nothing to do. - if (size == 1) - return s; - - PartTreeStorage old_part = s; // Tree corresponding to "cellweights". - - /* Find own limb end */ - int rank; - MPI_Comm_rank(comm, &rank); - auto my_leaf = s.node_of_rank(rank); - auto my_limb_root = my_leaf.find_root_of_subtree(depth); - - /* Get all ranks that have subdomains participating in this limb end */ - std::vector limb_neighbors; - limb_neighbors.reserve(3); - for (int r = my_limb_root.pstart(); r < my_limb_root.pend() + 1; ++r) { - limb_neighbors.push_back(r); - } - - /* Distribute weights to "limb_neighbors" */ - - // A group of processes participating in a limb is identified by the smallest - // rank in this limb end. - const int limb_group = *std::min_element(limb_neighbors.begin(), limb_neighbors.end()); - MPI_Comm neighcomm; - MPI_Comm_split(comm, limb_group, rank, &neighcomm); - util::GlobalVector neighbor_load(neighcomm, cellweights); - - // Setup a mapping from ranks relative to "comm" to ranks relative to "neighcomm" - // Since we chose the rank relative to "comm" as key in MPI_Comm_split, - // we could also do this manually: - // rank_to_neighrank[min(limb_neighbors)] = 0 - // rank_to_neighrank[max(limb_neighbors)] = limb_neighbors.size() - 1; - // And if it is a limbend of size 3 assign rank "1" to the middle element. - // But this is less error prone if we ever choose to change the Comm_split. - const std::map rank_to_neighrank = map_ranks_to_comm(comm, limb_neighbors, neighcomm); - - using cell_type = std::array; - auto neighbor_load_func = [&old_part, &neighbor_load, &rank_to_neighrank, &limb_neighbors](const cell_type& c){ - auto n = old_part.node_of_cell(c); - - // Transform c to process ("rank") local coordinates - cell_type loc_c, loc_box; - for (auto i = 0; i < 3; ++i) { - loc_c[i] = c[i] - n.lu()[i]; - loc_box[i] = n.ro()[i] - n.lu()[i]; - } - - const auto i = LinearizeFn(loc_c, loc_box); - - // Map rank (relative to "comm") to "neighcomm". - assert(std::find(limb_neighbors.begin(), limb_neighbors.end(), n.rank()) != limb_neighbors.end()); - const auto rank = rank_to_neighrank.at(n.rank()); - - assert(neighbor_load.size(rank) > i); - return neighbor_load(rank, i); - }; - - auto codimload = util::CodimSum(neighbor_load_func); - - auto splitfunc = [&codimload](int split_dir, std::array lu, std::array ro, int nproc, int nproc_left) { - return quality_splitting(codimload(split_dir, lu, ro), nproc, nproc_left); - }; - int new_max_depth = 0; - /* Re-partition limb ends - * The process with smallest rank number is responsible for re-creating - * this limb - */ - if (rank == my_limb_root.pstart() && limb_neighbors.size() > 1) { - std::cout << "pre re-split:" << std::endl; - std::cout << "child1.nproc " << my_limb_root.child1().nproc() << std::endl; - std::cout << "child2.nproc " << my_limb_root.child2().nproc() << std::endl; + if (is_responsible_process && subtree_neighbors.size() > 1) { s.walk_subtree([&s, &splitfunc, &new_max_depth](auto node) { // Need to split the node further? if (node.nproc() > 1) { @@ -1059,23 +942,26 @@ PartTreeStorage& repart_parttree_par_local_top(PartTreeStorage& s, MPI_Comm comm } else { new_max_depth = std::max(new_max_depth, node.depth()); } - }, my_limb_root); - std::cout << "after re-split:" << std::endl; - std::cout << "child1.nproc " << my_limb_root.child1().nproc() << std::endl; - std::cout << "child2.nproc " << my_limb_root.child2().nproc() << std::endl; + }, my_subtree_root); } /* Invalidate all limb ends * Limb ends are invalidated on all processes not participating as well as on * all participating processes BUT the one that performed the new split */ - s.walk([my_limb_root, &s, rank, depth](auto node){ - if (node.depth() == depth) { - if (node != my_limb_root || rank != my_limb_root.pstart()) { + s.walk_post([my_subtree_root, &s, is_repartition_subtree_root, is_responsible_process](auto node){ + if (is_repartition_subtree_root(node)) { + if (node != my_subtree_root || !is_responsible_process) { s.invalidate_subtree(node); // Stop "walk" from descending into nothing node.inner() = 0; } + // Don't descend any further. For the limb end version of + // repartitioning, subtrees of "node" is_repartitin_subtree_root + // can also evaluate to true. + return false; + } else { + return true; } }); @@ -1099,6 +985,53 @@ PartTreeStorage& repart_parttree_par_local_top(PartTreeStorage& s, MPI_Comm comm return s; } +} + +/** Repartitions a kd tree inline by means of local communication of the weights + * and local computation of the new splits. + * + * Repartitioning is done individually in each subtree rooted at the nodes + * of depth "depth" in the tree. + * In a regular binary tree 2^depth subgroups will be formed that repartition + * individually. This makes s.root().nproc() / (1< +PartTreeStorage& repart_parttree_par_local_top(PartTreeStorage& s, MPI_Comm comm, const std::vector& cellweights, int depth) +{ + return _repart_parttree_par_local_impl(s, comm, cellweights, [depth](auto leaf){ + return leaf.find_root_of_subtree(depth); + }, [depth](auto node) { + return node.depth() == depth; + }); +} + + +/** Repartitions a kd tree inline by means of local communication of the weights + * and local computation of the new splits. + * + * Repartitioning is done between 2 or 3 processes that share the same + * level 1 or level 2 subtree. + * + * @returns Parameter "s" + * + * @see repart_parttree_par + */ +template +PartTreeStorage& repart_parttree_par_local(PartTreeStorage& s, MPI_Comm comm, const std::vector& cellweights) +{ + return _repart_parttree_par_local_impl(s, comm, cellweights, [](auto leaf){ + return leaf.find_limbend_root(); + }, [](auto node){ + return node.is_limbend(); + }); +} + /** Returns a tree with "size" subdomain. * diff --git a/kdpart_test_par.cc b/kdpart_test_par.cc index 8b8565c..ff8f033 100644 --- a/kdpart_test_par.cc +++ b/kdpart_test_par.cc @@ -243,65 +243,8 @@ void check_it() } } - -void check_it_local() -{ - std::random_device r; - std::default_random_engine rng(r()); - //std::uniform_int_distribution uniform_dist(50, 500); - std::uniform_int_distribution uniform_dist(50, 100); - - const int N = 100; - int size, rank; - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - if (rank == 0) - std::cout << "[local] Doing " << N << " checks with " << size << " procs." << std::endl; - - for (int i = 0; i < N; ++i) { - std::array box = {{uniform_dist(rng), uniform_dist(rng), uniform_dist(rng)}}; - // Make sure every node has the same box - MPI_Bcast(box.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); - - auto t = kdpart::initial_part_par(size, box); - - if (rank == 0) - std::cout << "[local] REPART TEST " << i; - - // Get own subdomain size - std::array lu, ro; - std::tie(lu, ro) = t.subdomain_bounds(rank); - int ncells = area(lu, ro); - - // Provide weights for own cells and repartition (reuse int rng). - std::vector weights(ncells); - std::generate(std::begin(weights), std::end(weights), [&uniform_dist, &rng, rank](){ - return static_cast(uniform_dist(rng) + 200 * (rank % 2)); // Every other process has more load - }); - double imba_before = imbalance(t, t, size, box, weights); - auto old_tree = t; - kdpart::repart_parttree_par_local(t, MPI_COMM_WORLD, weights); - double imba_after = imbalance(old_tree, t, size, box, weights); - - // Check repartitioned tree - all_valid_subdomains_check(t, box); - all_cells_assigned_check(t, box, size); - all_procs_assigned_check(t, size, box); - all_procs_have_cells(t, size, box, false); // not partitioned equally w.r.t. no. of cells, so no eq. dist check. - all_procs_disjoint(t, size, box); - mashall_test(t); - // Test for equal partitioning - CHECK(imba_after <= imba_before); // Conservative: Tests reduction only - CHECK(imba_after < 2.0); - - if (rank == 0) - std::cout << " passed. (Imbalance (before/after): " << imba_before << " / " << imba_after << ")" << std::endl; - MPI_Barrier(MPI_COMM_WORLD); - } -} - -void check_it_local_2() +template +void check_it_local(F &&repart_fn) { std::random_device r; std::default_random_engine rng(r()); @@ -338,7 +281,7 @@ void check_it_local_2() }); double imba_before = imbalance(t, t, size, box, weights); auto old_tree = t; - kdpart::repart_parttree_par_local_top(t, MPI_COMM_WORLD, weights, 0); + repart_fn(t, MPI_COMM_WORLD, weights); double imba_after = imbalance(old_tree, t, size, box, weights); // Check repartitioned tree @@ -364,8 +307,12 @@ int main(int argc, char **argv) std::cout << "Checking global repart..." << std::endl; check_it(); std::cout << "Checking local repart..." << std::endl; - check_it_local(); + check_it_local([](auto &t, MPI_Comm comm, const auto &weights){ + kdpart::repart_parttree_par_local(t, comm, weights); + }); std::cout << "Checking local top repart..." << std::endl; - check_it_local_2(); + check_it_local([](auto &t, MPI_Comm comm, const auto &weights){ + kdpart::repart_parttree_par_local_top(t, comm, weights, 1); + }); MPI_Finalize(); }