From afc90b9ef2dbf4eba1dbf806f8de1e1a92097520 Mon Sep 17 00:00:00 2001 From: anagainaru Date: Fri, 13 Sep 2024 22:35:05 -0400 Subject: [PATCH 1/2] Adding testing for the Kokkos backend --- testing/adios2/engine/bp/CMakeLists.txt | 10 + .../engine/bp/TestBPWriteReadKokkos.cpp | 537 ++++++++++++++++++ 2 files changed, 547 insertions(+) create mode 100644 testing/adios2/engine/bp/TestBPWriteReadKokkos.cpp diff --git a/testing/adios2/engine/bp/CMakeLists.txt b/testing/adios2/engine/bp/CMakeLists.txt index 656c23ce78..7c2a6e1627 100644 --- a/testing/adios2/engine/bp/CMakeLists.txt +++ b/testing/adios2/engine/bp/CMakeLists.txt @@ -279,6 +279,16 @@ gtest_add_tests_helper(StepsInSituLocalArray MPI_ALLOW BP Engine.BP. .FileStream WORKING_DIRECTORY ${FS_DIR} EXTRA_ARGS "FileStream" ) +if(ADIOS2_HAVE_Kokkos) + gtest_add_tests_helper(WriteReadKokkos MPI_ALLOW BP Engine.BP. .BP5 + WORKING_DIRECTORY ${BP5_DIR} EXTRA_ARGS "BP5" + ) + + foreach(tgt ${Test.Engine.BP.WriteReadKokkos-TARGETS}) + target_link_libraries(${tgt} Kokkos::kokkos) + endforeach() +endif() + if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA) gtest_add_tests_helper(WriteReadCuda MPI_ALLOW BP Engine.BP. .BP4 WORKING_DIRECTORY ${BP4_DIR} EXTRA_ARGS "BP4" diff --git a/testing/adios2/engine/bp/TestBPWriteReadKokkos.cpp b/testing/adios2/engine/bp/TestBPWriteReadKokkos.cpp new file mode 100644 index 0000000000..60a3af71ef --- /dev/null +++ b/testing/adios2/engine/bp/TestBPWriteReadKokkos.cpp @@ -0,0 +1,537 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +std::string engineName; + +const float EPSILON = std::numeric_limits::epsilon(); +const float INCREMENT = 10.0f; + +void KokkosDetectMemSpace(const std::string mode) +{ + const std::string fname("BPWRKokkosDetect" + mode + ".bp"); + adios2::Mode ioMode = adios2::Mode::Deferred; + if (mode == "Sync") + ioMode = adios2::Mode::Sync; + + const size_t Nx = 5; + const size_t NSteps = 2; + + adios2::ADIOS adios; + { // write + Kokkos::View cpuData("simBuffer", Nx); + Kokkos::parallel_for( + "initBuffer", Kokkos::RangePolicy(0, Nx), + KOKKOS_LAMBDA(int i) { cpuData(i) = static_cast(i); }); + Kokkos::fence(); + auto gpuData = Kokkos::create_mirror_view_and_copy( + Kokkos::DefaultExecutionSpace::memory_space{}, cpuData); + + adios2::IO io = adios.DeclareIO("TestIO"); + const adios2::Dims shape{Nx}; + const adios2::Dims start{0}; + const adios2::Dims count{Nx}; + auto var_r32 = io.DefineVariable("r32", shape, start, count); + auto var_gpur32 = io.DefineVariable("gpur32", shape, start, count); + EXPECT_TRUE(var_r32); + EXPECT_TRUE(var_gpur32); + + io.SetEngine("BP5"); + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); + + for (size_t step = 0; step < NSteps; ++step) + { + // Update values in the simulation data + bpWriter.BeginStep(); + bpWriter.Put(var_gpur32, gpuData, ioMode); + bpWriter.Put(var_r32, cpuData, ioMode); + bpWriter.EndStep(); + Kokkos::parallel_for( + "updateCPUBuffer", Kokkos::RangePolicy(0, Nx), + KOKKOS_LAMBDA(int i) { cpuData(i) += INCREMENT; }); + Kokkos::parallel_for( + "updateGPUBuffer", Kokkos::RangePolicy(0, Nx), + KOKKOS_LAMBDA(int i) { gpuData(i) += INCREMENT; }); + Kokkos::fence(); + } + + bpWriter.Close(); + } + { // read + adios2::IO io = adios.DeclareIO("ReadIO"); + io.SetEngine("BP5"); + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + + adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); + unsigned int t = 0; + for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t) + { + auto var_r32 = io.InquireVariable("r32"); + auto var_gpur32 = io.InquireVariable("gpur32"); + EXPECT_TRUE(var_r32); + EXPECT_TRUE(var_gpur32); + + std::vector r32o(Nx); + Kokkos::View gpuData("readBuffer", + Nx); + + bpReader.Get(var_r32, r32o.data(), ioMode); + bpReader.Get(var_gpur32, gpuData, ioMode); + bpReader.EndStep(); + auto cpuData = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace::memory_space{}, gpuData); + + for (size_t i = 0; i < Nx; i++) + { + char msg[1 << 8] = {0}; + snprintf(msg, sizeof(msg), "t=%d i=%zu cpu=%f gpu=%f", t, i, r32o[i], cpuData(i)); + ASSERT_LT(std::abs(r32o[i] - cpuData(i)), EPSILON) << msg; + } + } + EXPECT_EQ(t, NSteps); + + bpReader.Close(); + } +} + +void KokkosWriteReadMemorySelection() +{ + adios2::MemorySpace adiosMemSpace = adios2::MemorySpace::Host; +#ifdef ADIOS2_HAVE_GPU_SUPPORT + if (!std::is_same::value) + adiosMemSpace = adios2::MemorySpace::GPU; +#endif + + int mpiRank = 0, mpiSize = 1; +#if ADIOS2_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank); + MPI_Comm_size(MPI_COMM_WORLD, &mpiSize); + const std::string fname("BPWRKokkosMemSel2D_MPI.bp"); +#else + const std::string fname("BPWRKokkosMemSel2D.bp"); +#endif + +#if ADIOS2_USE_MPI + adios2::ADIOS adios(MPI_COMM_WORLD); +#else + adios2::ADIOS adios; +#endif + const size_t Nx = 3, Ny = 2; + const size_t NSteps = 2; + const size_t ghostCells = 2; + const size_t totalNx = Nx + 2 * ghostCells, totalNy = Ny + 2 * ghostCells; + + Kokkos::View inputData("inBuffer", + totalNx, totalNy); + // initialize all data to 0 and update values in the active selection + Kokkos::parallel_for( + "zeroBuffer", Kokkos::MDRangePolicy>({0, 0}, {totalNx, totalNy}), + KOKKOS_LAMBDA(int x, int y) { inputData(x, y) = 0; }); + Kokkos::parallel_for( + "initBuffer", + Kokkos::MDRangePolicy>({ghostCells, ghostCells}, + {totalNx - ghostCells, totalNy - ghostCells}), + KOKKOS_LAMBDA(int x, int y) { inputData(x, y) = x * (mpiRank + 1); }); + Kokkos::fence(); + + { // write + + adios2::IO io = adios.DeclareIO("TestIO"); + io.SetEngine("BP5"); + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + + const adios2::Dims shape{static_cast(Nx * mpiSize), Ny}; + const adios2::Dims start{static_cast(Nx * mpiRank), 0}; + const adios2::Dims count{Nx, Ny}; + auto var_r32 = io.DefineVariable("r32", shape, start, count); + + const adios2::Dims memoryStart = {ghostCells, ghostCells}; + const adios2::Dims memoryCount = {totalNx, totalNy}; + var_r32.SetMemorySelection({memoryStart, memoryCount}); + + adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); + for (size_t step = 0; step < NSteps; ++step) + { + bpWriter.BeginStep(); + var_r32.SetMemorySpace(adiosMemSpace); + bpWriter.Put(var_r32, inputData); + bpWriter.EndStep(); + // Update values in the simulation data + Kokkos::parallel_for( + "updateBuffer", + Kokkos::MDRangePolicy>( + {ghostCells, ghostCells}, {totalNx - ghostCells, totalNy - ghostCells}), + KOKKOS_LAMBDA(int x, int y) { inputData(x, y) += INCREMENT; }); + Kokkos::fence(); + } + + bpWriter.Close(); + } + { + adios2::IO io = adios.DeclareIO("ReadIO"); + io.SetEngine("BP5"); + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + + adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); + unsigned int t = 0; + for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t) + { + auto var_r32 = io.InquireVariable("r32"); + var_r32.SetSelection({{Nx * mpiRank, 0}, {Nx, Ny}}); + EXPECT_TRUE(var_r32); + var_r32.SetMemorySpace(adiosMemSpace); + EXPECT_EQ(var_r32.Min(), t * INCREMENT + ghostCells); + EXPECT_EQ(var_r32.Max(), t * INCREMENT + (Nx + ghostCells - 1) * mpiSize); + + Kokkos::View outputData( + "outBuffer", Nx, Ny); + bpReader.Get(var_r32, outputData); + bpReader.EndStep(); + + auto cpuData = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace::memory_space{}, outputData); + + for (size_t x = 0; x < Nx; x++) + for (size_t y = 0; y < Ny; y++) + { + char msg[1 << 8] = {0}; + snprintf(msg, sizeof(msg), "t=%d i=(%zu %zu) rank=%d r32o=%f r32i=%f", t, x, y, + mpiRank, cpuData(x, y), + INCREMENT * t + (x + ghostCells) * (mpiRank + 1)); + ASSERT_LT(std::abs(cpuData(x, y) - + (INCREMENT * t + (x + ghostCells) * (mpiRank + 1))), + EPSILON) + << msg; + } + } + EXPECT_EQ(t, NSteps); + bpReader.Close(); + } +} + +void KokkosWriteReadMPI2D() +{ + const size_t Nx = 5; + const size_t Ny = 2; + const size_t NSteps = 3; + + adios2::MemorySpace adiosMemSpace = adios2::MemorySpace::Host; +#ifdef ADIOS2_HAVE_GPU_SUPPORT + if (!std::is_same::value) + adiosMemSpace = adios2::MemorySpace::GPU; +#endif + + int mpiRank = 0, mpiSize = 1; +#if ADIOS2_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank); + MPI_Comm_size(MPI_COMM_WORLD, &mpiSize); + const std::string fname("BPWRKokkos2D_MPI.bp"); +#else + const std::string fname("BPWRKokkos2D.bp"); +#endif + +#if ADIOS2_USE_MPI + adios2::ADIOS adios(MPI_COMM_WORLD); +#else + adios2::ADIOS adios; +#endif + + // Initialize the simulation data + Kokkos::View inputData("inBuffer", Nx, + Ny); + Kokkos::parallel_for( + "initBuffer", Kokkos::MDRangePolicy>({0, 0}, {Nx, Ny}), + KOKKOS_LAMBDA(int x, int y) { inputData(x, y) = x * (mpiRank + 1); }); + Kokkos::fence(); + + { // write + adios2::IO io = adios.DeclareIO("TestIO"); + io.SetEngine("BP5"); + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + + const adios2::Dims shape{static_cast(Nx * mpiSize), Ny}; + const adios2::Dims start{static_cast(Nx * mpiRank), 0}; + const adios2::Dims count{Nx, Ny}; + + auto var_r32 = io.DefineVariable("r32", shape, start, count); + var_r32.SetMemorySpace(adiosMemSpace); + + adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); + for (size_t step = 0; step < NSteps; ++step) + { + bpWriter.BeginStep(); + bpWriter.Put(var_r32, inputData); + bpWriter.EndStep(); + // Update values in the simulation data + Kokkos::parallel_for( + "updateBuffer", Kokkos::MDRangePolicy>({0, 0}, {Nx, Ny}), + KOKKOS_LAMBDA(int x, int y) { inputData(x, y) += INCREMENT; }); + Kokkos::fence(); + } + + bpWriter.Close(); + } + +#if ADIOS2_USE_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + + { // read + adios2::IO io = adios.DeclareIO("ReadIO"); + io.SetEngine("BP5"); + + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + + adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); + + unsigned int t = 0; + for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t) + { + auto var_r32 = io.InquireVariable("r32"); + EXPECT_TRUE(var_r32); + var_r32.SetMemorySpace(adiosMemSpace); + var_r32.SetSelection({{Nx * mpiRank, 0}, {Nx, Ny}}); + + EXPECT_EQ(var_r32.Min(), INCREMENT * t); + EXPECT_EQ(var_r32.Max(), INCREMENT * t + (Nx - 1) * mpiSize); + + Kokkos::View outputData( + "outBuffer", Nx, Ny); + bpReader.Get(var_r32, outputData); + bpReader.EndStep(); + + auto cpuData = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace::memory_space{}, outputData); + + for (size_t x = 0; x < Nx; x++) + for (size_t y = 0; y < Ny; y++) + { + char msg[1 << 8] = {0}; + snprintf(msg, sizeof(msg), "t=%d i=(%zu %zu) rank=%d r32o=%f r32i=%f", t, x, y, + mpiRank, cpuData(x, y), INCREMENT * t + x * (mpiRank + 1)); + ASSERT_LT(std::abs(cpuData(x, y) - (INCREMENT * t + x * (mpiRank + 1))), + EPSILON) + << msg; + } + } + EXPECT_EQ(t, NSteps); + + bpReader.Close(); + } +} + +bool compareSelection2D( + const Kokkos::View a, + const adios2::Dims &shape, const adios2::Dims &start, const adios2::Dims &count, + Kokkos::View b) +{ + std::cout << " compare Block: shape = " << adios2::ToString(shape) + << " start = " << adios2::ToString(start) << " count = " << adios2::ToString(count) + << std::endl; + size_t match = 0; + auto const start_0 = start[0]; + auto const start_1 = start[1]; + Kokkos::parallel_reduce( + "compareBuffers", Kokkos::MDRangePolicy>({0, 0}, {count[0], count[1]}), + KOKKOS_LAMBDA(int x, int y, size_t &lmatch) { + if (b(x, y) != a(start_0 + x, start_1 + y)) + { + lmatch++; + Kokkos::printf(" Non-match at pos = (%d %d) : input = %f : output = %f\n", x, y, + a(start_0 + x, start_1 + y), b(x, y)); + } + }, + match); + Kokkos::fence(); + return (match == 0); +} + +void KokkosWriteReadSelection2D() +{ + adios2::MemorySpace adiosMemSpace = adios2::MemorySpace::Host; +#ifdef ADIOS2_HAVE_GPU_SUPPORT + if (!std::is_same::value) + adiosMemSpace = adios2::MemorySpace::GPU; +#endif + + constexpr size_t C1 = 5; + constexpr size_t C2 = 4; + constexpr size_t DIM1 = 3 * C1; + constexpr size_t DIM2 = 3 * C2; + const std::string filename = "BPWRKokkosSel2D.bp"; + Kokkos::View inputData("inBuffer", DIM1, + DIM2); + Kokkos::parallel_for( + "initBuffer", Kokkos::MDRangePolicy>({0, 0}, {DIM1, DIM2}), + KOKKOS_LAMBDA(int x, int y) { inputData(x, y) = x * 1.0 + y / 100.0; }); + Kokkos::fence(); + + adios2::ADIOS adios; + { // write + adios2::IO ioWrite = adios.DeclareIO("TestIO"); + ioWrite.SetEngine("BP5"); + if (!engineName.empty()) + { + ioWrite.SetEngine(engineName); + } + adios2::Engine engine = ioWrite.Open(filename, adios2::Mode::Write); + const adios2::Dims shape = {DIM1, DIM2}; + const adios2::Dims count = {C1, C2}; + adios2::Dims start{0, 0}; + adios2::Variable var = + ioWrite.DefineVariable("selDouble", shape, start, count); + + engine.BeginStep(); + for (size_t i = 0; i < DIM1; i += count[0]) + { + for (size_t j = 0; j < DIM2; j += count[1]) + { + start = {i, j}; + Kokkos::View selData( + "selBuffer", C1, C2); + Kokkos::parallel_for( + "createSelBuffer", Kokkos::MDRangePolicy>({0, 0}, {C1, C2}), + KOKKOS_LAMBDA(int x, int y) { selData(x, y) = inputData(i + x, j + y); }); + Kokkos::fence(); + var.SetSelection({start, count}); + var.SetMemorySpace(adiosMemSpace); + engine.Put(var, selData, adios2::Mode::Sync); + } + } + + engine.EndStep(); + engine.Close(); + } + { // read + adios2::ADIOS adios; + adios2::IO ioRead = adios.DeclareIO("TestIORead"); + ioRead.SetEngine("BP5"); + if (!engineName.empty()) + { + ioRead.SetEngine(engineName); + } + adios2::Engine engine = ioRead.Open(filename, adios2::Mode::Read); + EXPECT_TRUE(engine); + engine.BeginStep(); + adios2::Variable var = ioRead.InquireVariable("selDouble"); + EXPECT_TRUE(var); + const adios2::Dims shape = {DIM1, DIM2}; + const adios2::Dims count = {C1, C2}; + adios2::Dims s{0, 0}; + adios2::Dims c = shape; + adios2::Dims firstNonMatch{0, 0}; + + /* Entire array */ + { + Kokkos::View selOutData( + "selOutBuffer", DIM1, DIM2); + var.SetSelection({s, c}); + var.SetMemorySpace(adiosMemSpace); + engine.Get(var, selOutData, adios2::Mode::Sync); + EXPECT_TRUE(compareSelection2D(inputData, shape, s, c, selOutData)); + } + /* Single block in the center */ + { + s = {5, 4}; + c = count; + Kokkos::View selOutData( + "selOutBuffer", c[0], c[1]); + var.SetSelection({s, c}); + var.SetMemorySpace(adiosMemSpace); + engine.Get(var, selOutData, adios2::Mode::Sync); + EXPECT_TRUE(compareSelection2D(inputData, shape, s, c, selOutData)); + } + /* Four blocks in X-Y direction */ + { + s = {5, 4}; + c = {2 * count[0], 2 * count[1]}; + Kokkos::View selOutData( + "selOutBuffer", c[0], c[1]); + var.SetSelection({s, c}); + var.SetMemorySpace(adiosMemSpace); + engine.Get(var, selOutData, adios2::Mode::Sync); + EXPECT_TRUE(compareSelection2D(inputData, shape, s, c, selOutData)); + } + /* Partial blocks : center part of single block in center */ + { + s = {6, 5}; + c = {count[0] - 2, count[1] - 2}; + Kokkos::View selOutData( + "selOutBuffer", c[0], c[1]); + var.SetSelection({s, c}); + var.SetMemorySpace(adiosMemSpace); + engine.Get(var, selOutData, adios2::Mode::Sync); + EXPECT_TRUE(compareSelection2D(inputData, shape, s, c, selOutData)); + } + } +} + +class BPWRKokkos : public ::testing::TestWithParam +{ +public: + BPWRKokkos() = default; + + virtual void SetUp() {} + virtual void TearDown() {} +}; + +TEST_P(BPWRKokkos, ADIOS2BPKokkosDetect) { KokkosDetectMemSpace(GetParam()); } +TEST_P(BPWRKokkos, ADIOS2BPKokkosMemSel) { KokkosWriteReadMemorySelection(); } +TEST_P(BPWRKokkos, ADIOS2BPWRKokkos2D) { KokkosWriteReadMPI2D(); } +TEST_P(BPWRKokkos, ADIOS2BPWRKokkosSel2D) { KokkosWriteReadSelection2D(); } + +INSTANTIATE_TEST_SUITE_P(KokkosRW, BPWRKokkos, ::testing::Values("deferred", "sync")); + +int main(int argc, char **argv) +{ +#if ADIOS2_USE_MPI + int provided; + + // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP + MPI_Init_thread(nullptr, nullptr, MPI_THREAD_MULTIPLE, &provided); +#endif + + int result; + ::testing::InitGoogleTest(&argc, argv); + if (argc > 1) + { + engineName = std::string(argv[1]); + } + result = RUN_ALL_TESTS(); + +#if ADIOS2_USE_MPI + MPI_Finalize(); +#endif + + return result; +} From b8ffbb91b7c9a011ed2b9fc284ee1c3494569347 Mon Sep 17 00:00:00 2001 From: anagainaru Date: Sat, 14 Sep 2024 14:33:57 -0400 Subject: [PATCH 2/2] Remove the CUDA backend testing --- testing/adios2/engine/bp/CMakeLists.txt | 25 - .../adios2/engine/bp/TestBPWriteReadCuda.cpp | 462 ------------------ 2 files changed, 487 deletions(-) delete mode 100644 testing/adios2/engine/bp/TestBPWriteReadCuda.cpp diff --git a/testing/adios2/engine/bp/CMakeLists.txt b/testing/adios2/engine/bp/CMakeLists.txt index 7c2a6e1627..b17438bdfb 100644 --- a/testing/adios2/engine/bp/CMakeLists.txt +++ b/testing/adios2/engine/bp/CMakeLists.txt @@ -288,28 +288,3 @@ if(ADIOS2_HAVE_Kokkos) target_link_libraries(${tgt} Kokkos::kokkos) endforeach() endif() - -if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA) - gtest_add_tests_helper(WriteReadCuda MPI_ALLOW BP Engine.BP. .BP4 - WORKING_DIRECTORY ${BP4_DIR} EXTRA_ARGS "BP4" - ) - gtest_add_tests_helper(SelectionsCuda MPI_ALLOW BP Engine.BP. .BP4 - WORKING_DIRECTORY ${BP4_DIR} EXTRA_ARGS "BP4" - ) - gtest_add_tests_helper(WriteReadCuda MPI_ALLOW BP Engine.BP. .BP5 - WORKING_DIRECTORY ${BP5_DIR} EXTRA_ARGS "BP5" - ) - gtest_add_tests_helper(SelectionsCuda MPI_ALLOW BP Engine.BP. .BP5 - WORKING_DIRECTORY ${BP5_DIR} EXTRA_ARGS "BP5" - ) - - foreach(tgt ${Test.Engine.BP.WriteReadCuda-TARGETS}) - target_sources(${tgt} PRIVATE operations/CudaRoutines.cu) - target_link_libraries(${tgt} CUDA::cudart) - endforeach() - - foreach(tgt ${Test.Engine.BP.SelectionsCuda-TARGETS}) - target_sources(${tgt} PRIVATE operations/CudaRoutines.cu) - target_link_libraries(${tgt} CUDA::cudart) - endforeach() -endif() diff --git a/testing/adios2/engine/bp/TestBPWriteReadCuda.cpp b/testing/adios2/engine/bp/TestBPWriteReadCuda.cpp deleted file mode 100644 index 880c55de35..0000000000 --- a/testing/adios2/engine/bp/TestBPWriteReadCuda.cpp +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Distributed under the OSI-approved Apache License, Version 2.0. See - * accompanying file Copyright.txt for details. - */ - -#include "operations/CudaRoutines.h" - -#include - -#include -#include -#include -#include -#include //std::iota - -std::string engineName; // comes from command line - -const float EPSILON = std::numeric_limits::epsilon(); -const float INCREMENT = 10.0f; - -void CUDAWrongMemSpace() -{ - const std::string fname("BPWRCUFail.bp"); - const size_t Nx = 5; - - adios2::ADIOS adios; - std::vector r32s(Nx, .0f); - std::iota(r32s.begin(), r32s.end(), .0f); - { // write - adios2::IO io = adios.DeclareIO("TestIO"); - const adios2::Dims shape{Nx}; - const adios2::Dims start{0}; - const adios2::Dims count{Nx}; - auto var_r32 = io.DefineVariable("r32", shape, start, count); - auto var_r32_cpu = io.DefineVariable("r32cpu", shape, start, count); - - float *gpuSimData = nullptr; - cudaMalloc(&gpuSimData, Nx * sizeof(float)); - cudaMemcpy(gpuSimData, (float *)&r32s[0], Nx * sizeof(float), cudaMemcpyHostToDevice); - - io.SetEngine("BP5"); - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); - - bpWriter.BeginStep(); - var_r32.SetMemorySpace(adios2::MemorySpace::Host); - EXPECT_DEATH(bpWriter.Put(var_r32, gpuSimData), ""); - var_r32_cpu.SetMemorySpace(adios2::MemorySpace::GPU); - bpWriter.Put(var_r32_cpu, r32s.data()); - bpWriter.EndStep(); - - bpWriter.Close(); - } - { // read - adios2::IO io = adios.DeclareIO("ReadIO"); - io.SetEngine("BP5"); - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - - adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); - - bpReader.BeginStep(); - auto var_r32 = io.InquireVariable("r32cpu"); - EXPECT_TRUE(var_r32); - - std::vector r32o(Nx); - float *gpuSimData; - cudaMalloc(&gpuSimData, Nx * sizeof(float)); - var_r32.SetMemorySpace(adios2::MemorySpace::Host); - EXPECT_THROW(bpReader.Get(var_r32, gpuSimData, adios2::Mode::Sync), std::ios_base::failure); - var_r32.SetMemorySpace(adios2::MemorySpace::GPU); - EXPECT_THROW(bpReader.Get(var_r32, r32o.data(), adios2::Mode::Sync), - std::ios_base::failure); - // bpReader.EndStep(); - // bpReader.Close(); - } -} - -void CUDADetectMemSpace(const std::string mode) -{ - const std::string fname("BPWRCUDetect" + mode + ".bp"); - adios2::Mode ioMode = adios2::Mode::Deferred; - if (mode == "Sync") - ioMode = adios2::Mode::Sync; - - // Number of rows - const size_t Nx = 5; - // Number of columns - const size_t Ny = 2; - const size_t NTotal = Nx * Ny; - // Number of steps - const size_t NSteps = 10; - - adios2::ADIOS adios; - // simulation data - std::vector r32s(NTotal, .0f); - std::iota(r32s.begin(), r32s.end(), .0f); - - { // write - adios2::IO io = adios.DeclareIO("TestIO"); - const adios2::Dims shape{Ny, Nx}; - const adios2::Dims start{0, 0}; - const adios2::Dims count{Ny, Nx}; - auto var_r32 = io.DefineVariable("r32", shape, start, count); - EXPECT_TRUE(var_r32); - - float *gpuSimData = nullptr; - cudaMalloc(&gpuSimData, NTotal * sizeof(float)); - cudaMemcpy(gpuSimData, (float *)&r32s[0], NTotal * sizeof(float), cudaMemcpyHostToDevice); - - io.SetEngine("BP5"); - - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); - - for (size_t step = 0; step < NSteps; ++step) - { - // Update values in the simulation data - cuda_increment(NTotal, 1, 0, gpuSimData, INCREMENT); - std::transform(r32s.begin(), r32s.end(), r32s.begin(), - std::bind(std::plus(), std::placeholders::_1, INCREMENT)); - - bpWriter.BeginStep(); - if (step % 2 == 0) - bpWriter.Put(var_r32, gpuSimData, ioMode); - else - bpWriter.Put(var_r32, r32s.data(), ioMode); - bpWriter.EndStep(); - } - - bpWriter.Close(); - } - // reset the initial data - std::iota(r32s.begin(), r32s.end(), .0f); - { // read - adios2::IO io = adios.DeclareIO("ReadIO"); - io.SetEngine("BP5"); - - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - - adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); - - unsigned int t = 0; - for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t) - { - auto var_r32 = io.InquireVariable("r32"); - EXPECT_TRUE(var_r32); - ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray); - ASSERT_EQ(var_r32.Shape()[0], Ny); - ASSERT_EQ(var_r32.Shape()[1], Nx); - - std::vector r32o(NTotal); - float *gpuSimData; - cudaMalloc(&gpuSimData, NTotal * sizeof(float)); - if (t % 2 == 0) - { - bpReader.Get(var_r32, r32o.data(), ioMode); - bpReader.EndStep(); - } - else - { - bpReader.Get(var_r32, gpuSimData, ioMode); - bpReader.EndStep(); - cudaMemcpy(r32o.data(), gpuSimData, NTotal * sizeof(float), cudaMemcpyDeviceToHost); - } - // Remove INCREMENT from each element - std::transform( - r32o.begin(), r32o.end(), r32o.begin(), - std::bind(std::minus(), std::placeholders::_1, (t + 1) * INCREMENT)); - - for (size_t i = 0; i < NTotal; i++) - { - char msg[1 << 8] = {0}; - snprintf(msg, sizeof(msg), "t=%d i=%zu r32o=%f r32s=%f", t, i, r32o[i], r32s[i]); - ASSERT_LT(std::abs(r32o[i] - r32s[i]), EPSILON) << msg; - } - } - EXPECT_EQ(t, NSteps); - - bpReader.Close(); - } -} - -void CUDAWriteReadMemorySelection() -{ - const std::string fname("BPWRCUSel1D.bp"); - const size_t Nx = 10; - const size_t NSteps = 2; - const size_t ghostCells = 1; - std::vector r32s(Nx + 2 * ghostCells); - std::iota(r32s.begin(), r32s.end(), .0f); - - adios2::ADIOS adios; - { - // cuda simulation buffer - float *gpuSimData = nullptr; - cudaMalloc(&gpuSimData, (Nx + 2 * ghostCells) * sizeof(float)); - cudaMemcpy(gpuSimData, r32s.data(), (Nx + 2 * ghostCells) * sizeof(float), - cudaMemcpyHostToDevice); - - adios2::IO io = adios.DeclareIO("TestIO"); - io.SetEngine("BP5"); - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - - const adios2::Dims shape{static_cast(Nx)}; - const adios2::Dims start{static_cast(0)}; - const adios2::Dims count{Nx}; - auto var_r32 = io.DefineVariable("r32", shape, start, count); - - const adios2::Dims memoryStart = {ghostCells}; - const adios2::Dims memoryCount = {Nx + 2 * ghostCells}; - var_r32.SetMemorySelection({memoryStart, memoryCount}); - - adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); - - for (size_t step = 0; step < NSteps; ++step) - { - cuda_increment(Nx + 2 * ghostCells, 1, 0, gpuSimData, INCREMENT); - - bpWriter.BeginStep(); - var_r32.SetMemorySpace(adios2::MemorySpace::GPU); - bpWriter.Put(var_r32, gpuSimData); - bpWriter.EndStep(); - } - - bpWriter.Close(); - } - { - // remove ghost cells from the input vector when checking correctness - r32s.erase(r32s.begin(), r32s.begin() + ghostCells); - r32s.erase(r32s.end() - ghostCells, r32s.end()); - - adios2::IO io = adios.DeclareIO("ReadIO"); - io.SetEngine("BP5"); - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - - adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); - - unsigned int t = 0; - for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t) - { - auto var_r32 = io.InquireVariable("r32"); - EXPECT_TRUE(var_r32); - ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray); - ASSERT_EQ(var_r32.Shape()[0], Nx); - - auto mmR32 = std::minmax_element(r32s.begin(), r32s.end()); - EXPECT_EQ(var_r32.Min() - (t + 1) * INCREMENT, *mmR32.first); - EXPECT_EQ(var_r32.Max() - (t + 1) * INCREMENT, *mmR32.second); - - std::vector r32o(Nx); - float *gpuSimData; - cudaMalloc(&gpuSimData, Nx * sizeof(float)); - var_r32.SetMemorySpace(adios2::MemorySpace::GPU); - bpReader.Get(var_r32, gpuSimData); - bpReader.EndStep(); - cudaMemcpy(r32o.data(), gpuSimData, Nx * sizeof(float), cudaMemcpyDeviceToHost); - - // Remove INCREMENT from each element - std::transform( - r32o.begin(), r32o.end(), r32o.begin(), - std::bind(std::minus(), std::placeholders::_1, (t + 1) * INCREMENT)); - for (size_t i = 0; i < Nx; i++) - { - char msg[1 << 8] = {0}; - snprintf(msg, sizeof(msg), "t=%d i=%zu r32o=%f r32s=%f", t, i, r32o[i], r32s[i]); - ASSERT_LT(std::abs(r32o[i] - r32s[i]), EPSILON) << msg; - } - } - EXPECT_EQ(t, NSteps); - - bpReader.Close(); - } -} - -void CUDAWriteReadMPI1D(const std::string mode) -{ - adios2::Mode ioMode = adios2::Mode::Deferred; - if (mode == "Sync") - ioMode = adios2::Mode::Sync; - - // Number of rows - const size_t Nx = 100; - // Number of steps - const size_t NSteps = 10; - - int mpiRank = 0, mpiSize = 1; -#if ADIOS2_USE_MPI - MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank); - MPI_Comm_size(MPI_COMM_WORLD, &mpiSize); - const std::string fname("BPWRCU1D_" + mode + "_MPI.bp"); -#else - const std::string fname("BPWRCU1D_" + mode + ".bp"); -#endif - -#if ADIOS2_USE_MPI - adios2::ADIOS adios(MPI_COMM_WORLD); -#else - adios2::ADIOS adios; -#endif - - const size_t NxTotal = Nx * mpiSize; - - // Initialize the simulation data - std::vector r32s(NxTotal, .0f); - std::iota(r32s.begin(), r32s.end(), .0f); - - { - // cuda simulation buffer - float *gpuSimData = nullptr; - cudaMalloc(&gpuSimData, Nx * sizeof(float)); - cudaMemcpy(gpuSimData, ((float *)&r32s[0] + (Nx * mpiRank)), Nx * sizeof(float), - cudaMemcpyHostToDevice); - // host simulation buffer - std::vector simData(r32s.begin() + (Nx * mpiRank), - r32s.begin() + (Nx * (mpiRank + 1))); - - adios2::IO io = adios.DeclareIO("TestIO"); - io.SetEngine("BP5"); - - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - - const adios2::Dims shape{static_cast(NxTotal)}; - const adios2::Dims start{static_cast(Nx * mpiRank)}; - const adios2::Dims count{Nx}; - - auto var_r32 = io.DefineVariable("r32", shape, start, count); - auto var_r32_host = io.DefineVariable("r32host", shape, start, count); - - adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); - - for (size_t step = 0; step < NSteps; ++step) - { - // Update values in the simulation data - cuda_increment(Nx, 1, 0, gpuSimData, INCREMENT); - std::transform(simData.begin(), simData.end(), simData.begin(), - std::bind(std::plus(), std::placeholders::_1, INCREMENT)); - - bpWriter.BeginStep(); - var_r32.SetMemorySpace(adios2::MemorySpace::GPU); - bpWriter.Put(var_r32, gpuSimData, ioMode); - var_r32_host.SetMemorySpace(adios2::MemorySpace::Host); - bpWriter.Put(var_r32_host, simData.data(), ioMode); - bpWriter.EndStep(); - } - - bpWriter.Close(); - } - -#if ADIOS2_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - - { - adios2::IO io = adios.DeclareIO("ReadIO"); - io.SetEngine("BP5"); - - if (!engineName.empty()) - { - io.SetEngine(engineName); - } - - adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); - - unsigned int t = 0; - for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t) - { - auto var_r32 = io.InquireVariable("r32"); - EXPECT_TRUE(var_r32); - ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray); - ASSERT_EQ(var_r32.Shape()[0], NxTotal); - - auto mmR32 = std::minmax_element(r32s.begin(), r32s.end()); - EXPECT_EQ(var_r32.Min() - (t + 1) * INCREMENT, *mmR32.first); - EXPECT_EQ(var_r32.Max() - (t + 1) * INCREMENT, *mmR32.second); - - std::vector r32o(NxTotal); - float *gpuSimData; - cudaMalloc(&gpuSimData, NxTotal * sizeof(float)); - var_r32.SetMemorySpace(adios2::MemorySpace::GPU); - bpReader.Get(var_r32, gpuSimData, ioMode); - bpReader.EndStep(); - cudaMemcpy(r32o.data(), gpuSimData, NxTotal * sizeof(float), cudaMemcpyDeviceToHost); - - // Remove INCREMENT from each element - std::transform( - r32o.begin(), r32o.end(), r32o.begin(), - std::bind(std::minus(), std::placeholders::_1, (t + 1) * INCREMENT)); - - for (size_t i = 0; i < NxTotal; i++) - { - char msg[1 << 8] = {0}; - snprintf(msg, sizeof(msg), "t=%d i=%zu rank=%d r32o=%f r32s=%f", t, i, mpiRank, - r32o[i], r32s[i]); - ASSERT_LT(std::abs(r32o[i] - r32s[i]), EPSILON) << msg; - } - } - EXPECT_EQ(t, NSteps); - - bpReader.Close(); - } -} - -class BPWRCUDA : public ::testing::TestWithParam -{ -public: - BPWRCUDA() = default; - - virtual void SetUp() {} - virtual void TearDown() {} -}; - -TEST_P(BPWRCUDA, ADIOS2BPWRCUDA1D) { CUDAWriteReadMPI1D(GetParam()); } -TEST_P(BPWRCUDA, ADIOS2BPCUDADetect) { CUDADetectMemSpace(GetParam()); } -TEST_P(BPWRCUDA, ADIOS2BPCUDAWrong) { CUDAWrongMemSpace(); } -TEST_P(BPWRCUDA, ADIOS2BPCUDAMemSel) { CUDAWriteReadMemorySelection(); } - -INSTANTIATE_TEST_SUITE_P(CudaRW, BPWRCUDA, ::testing::Values("deferred", "sync")); - -int main(int argc, char **argv) -{ -#if ADIOS2_USE_MPI - int provided; - - // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP - MPI_Init_thread(nullptr, nullptr, MPI_THREAD_MULTIPLE, &provided); -#endif - - int result; - ::testing::InitGoogleTest(&argc, argv); - if (argc > 1) - { - engineName = std::string(argv[1]); - } - result = RUN_ALL_TESTS(); - -#if ADIOS2_USE_MPI - MPI_Finalize(); -#endif - - return result; -}