Skip to content

Commit

Permalink
Merge pull request ornladios#4359 from anagainaru/kokkos-cuda-mismatch
Browse files Browse the repository at this point in the history
Fix the code that copies wrongly from host to device GPU buffers instead of device to host
  • Loading branch information
anagainaru authored Oct 8, 2024
2 parents 24c3860 + 48049f0 commit 9e38983
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 18 deletions.
3 changes: 2 additions & 1 deletion source/adios2/engine/bp5/BP5Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1815,7 +1815,8 @@ void BP5Writer::PutCommon(VariableBase &variable, const void *values, bool sync)
helper::NdCopy((const char *)values, helper::CoreDims(ZeroDims), MemoryCount,
sourceRowMajor, false, (char *)ptr, MemoryStart, varCount, sourceRowMajor,
false, (int)ObjSize, helper::CoreDims(), helper::CoreDims(),
helper::CoreDims(), helper::CoreDims(), false /* safemode */, memSpace);
helper::CoreDims(), helper::CoreDims(), false /* safemode */, memSpace,
/* duringWrite */ true);
}
else
{
Expand Down
4 changes: 2 additions & 2 deletions source/adios2/helper/adiosMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ int NdCopy(const char *in, const CoreDims &inStart, const CoreDims &inCount,
const CoreDims &outStart, const CoreDims &outCount, const bool outIsRowMajor,
const bool outIsLittleEndian, const int typeSize, const CoreDims &inMemStart,
const CoreDims &inMemCount, const CoreDims &outMemStart, const CoreDims &outMemCount,
const bool safeMode, MemorySpace MemSpace)
const bool safeMode, const MemorySpace MemSpace, const bool duringWrite)

{

Expand Down Expand Up @@ -439,7 +439,7 @@ int NdCopy(const char *in, const CoreDims &inStart, const CoreDims &inCount,
if (MemSpace == MemorySpace::GPU)
{
helper::NdCopyGPU(inOvlpBase, outOvlpBase, inOvlpGapSize, outOvlpGapSize, ovlpCount,
minContDim, blockSize, MemSpace);
minContDim, blockSize, MemSpace, duringWrite);
return 0;
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion source/adios2/helper/adiosMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ int NdCopy(const char *in, const CoreDims &inStart, const CoreDims &inCount,
const bool outIsLittleEndian, const int typeSize,
const CoreDims &inMemStart = CoreDims(), const CoreDims &inMemCount = CoreDims(),
const CoreDims &outMemStart = CoreDims(), const CoreDims &outMemCount = CoreDims(),
const bool safeMode = false, MemorySpace MemSpace = MemorySpace::Host);
const bool safeMode = false, const MemorySpace MemSpace = MemorySpace::Host,
const bool duringWrite = false);

template <class T>
size_t PayloadSize(const T *data, const Dims &count) noexcept;
Expand Down
7 changes: 5 additions & 2 deletions source/adios2/helper/adiosMemory.inl
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void CopyFromBufferToGPU(T *GPUbuffer, size_t position, const char *source, Memo

static inline void NdCopyGPU(const char *&inOvlpBase, char *&outOvlpBase, CoreDims &inOvlpGapSize,
CoreDims &outOvlpGapSize, CoreDims &ovlpCount, size_t minContDim,
size_t blockSize, MemorySpace memSpace)
size_t blockSize, MemorySpace memSpace, bool duringWrite)
{
DimsArray pos(ovlpCount.size(), (size_t)0);
size_t curDim = 0;
Expand All @@ -116,7 +116,10 @@ static inline void NdCopyGPU(const char *&inOvlpBase, char *&outOvlpBase, CoreDi
pos[curDim]++;
curDim++;
}
CopyFromBufferToGPU(outOvlpBase, 0, inOvlpBase, memSpace, blockSize);
if (duringWrite)
CopyFromGPUToBuffer(outOvlpBase, 0, inOvlpBase, memSpace, blockSize);
else
CopyFromBufferToGPU(outOvlpBase, 0, inOvlpBase, memSpace, blockSize);
inOvlpBase += blockSize;
outOvlpBase += blockSize;
do
Expand Down
24 changes: 12 additions & 12 deletions source/adios2/helper/kokkos/adiosKokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,6 @@

namespace
{
void KokkosDeepCopy(const char *src, char *dst, size_t byteCount)
{
using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
Kokkos::View<const char *, mem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>> srcView(
src, byteCount);
Kokkos::View<char *, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> dstView(
dst, byteCount);
Kokkos::deep_copy(dstView, srcView);
}

template <class T>
void KokkosMinMaxImpl(const T *data, const size_t size, T &min, T &max)
{
Expand Down Expand Up @@ -63,12 +53,22 @@ namespace helper
{
void MemcpyGPUToBuffer(char *dst, const char *GPUbuffer, size_t byteCount)
{
KokkosDeepCopy(GPUbuffer, dst, byteCount);
using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
Kokkos::View<const char *, mem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>> srcView(
GPUbuffer, byteCount);
Kokkos::View<char *, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> dstView(
dst, byteCount);
Kokkos::deep_copy(dstView, srcView);
}

void MemcpyBufferToGPU(char *GPUbuffer, const char *src, size_t byteCount)
{
KokkosDeepCopy(src, GPUbuffer, byteCount);
using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
Kokkos::View<const char *, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> srcView(
src, byteCount);
Kokkos::View<char *, mem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>> dstView(GPUbuffer,
byteCount);
Kokkos::deep_copy(dstView, srcView);
}

bool IsGPUbuffer(const void *ptr)
Expand Down

0 comments on commit 9e38983

Please sign in to comment.