diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 25011c0c97a..dade2af57c0 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -77,10 +77,16 @@ inline auto make_pool(bool use_max = false) // effect the maximum amount of parallel tests, and therefore `tests/CMakeLists.txt` // `_CUGRAPH_TEST_PERCENT` default value will need to be audited. auto const [free, total] = rmm::available_device_memory(); - auto const min_alloc = - use_max ? rmm::align_down(std::min(free, total / 2), rmm::CUDA_ALLOCATION_ALIGNMENT) + // EOS: 1 node 0.94 succeeded 0.95 failed, 2+ nodes 0.97 succeeded 0.98 failed + auto const init_alloc = + use_max ? rmm::align_down(std::min(free, static_cast(total * 0.93)), rmm::CUDA_ALLOCATION_ALIGNMENT) : rmm::align_down(std::min(free, total / 10), rmm::CUDA_ALLOCATION_ALIGNMENT); - return rmm::mr::make_owning_wrapper(make_cuda(), min_alloc); + std::optional max_alloc{}; + if (use_max) { + max_alloc = init_alloc; + } + std::cout << "init_alloc ratio=" << static_cast(init_alloc) / static_cast(total) << std::endl; + return rmm::mr::make_owning_wrapper(make_cuda(), init_alloc, max_alloc); } inline auto make_binning() diff --git a/cpp/tests/utilities/mg_utilities.cpp b/cpp/tests/utilities/mg_utilities.cpp index 6f8fb8c6acd..d22a9956f17 100644 --- a/cpp/tests/utilities/mg_utilities.cpp +++ b/cpp/tests/utilities/mg_utilities.cpp @@ -60,7 +60,7 @@ std::unique_ptr initialize_mg_handle(size_t pool_size) --gpu_row_comm_size; } - cugraph::partition_manager::init_subcomm(*handle, gpu_row_comm_size); + cugraph::partition_manager::init_subcomm(*handle, std::max(comm_size / 8, 1)); return std::move(handle); } diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index b7a91985658..5edc722a8c6 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -246,7 +246,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { // cuMemAddressReserve // (https://developer.nvidia.com/blog/introducing-low-level-gpu-virtual-memory-management), we // can reduce the temporary memory requirement to (1 / num_partitions) * (original data size) - size_t constexpr num_partitions_per_gpu = 4; + size_t constexpr num_partitions_per_gpu = 8; size_t num_partitions = num_partitions_per_gpu * static_cast(multi_gpu ? handle.get_comms().get_size() : 1);