diff --git a/cpp/src/wholememory_ops/scatter_op_impl_mapped.cu b/cpp/src/wholememory_ops/scatter_op_impl_mapped.cu index 77f570f90..2d2bef81e 100644 --- a/cpp/src/wholememory_ops/scatter_op_impl_mapped.cu +++ b/cpp/src/wholememory_ops/scatter_op_impl_mapped.cu @@ -18,6 +18,7 @@ #include #include +#include "cuda_macros.hpp" #include "wholememory_ops/functions/gather_scatter_func.h" namespace wholememory_ops { @@ -41,6 +42,7 @@ wholememory_error_code_t wholememory_scatter_mapped( wholememory_desc, stream, scatter_sms); + WM_CUDA_CHECK(cudaStreamSynchronize(stream)); } } // namespace wholememory_ops