From 243e12e9f841e906c6c1eed0156c6089337013b6 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 27 Sep 2024 19:46:44 -0700 Subject: [PATCH] bounce buffer --- cpp/include/cudf/detail/device_scalar.hpp | 32 +++++++++++++++++------ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/detail/device_scalar.hpp b/cpp/include/cudf/detail/device_scalar.hpp index c795d30f728..1a925ad1b46 100644 --- a/cpp/include/cudf/detail/device_scalar.hpp +++ b/cpp/include/cudf/detail/device_scalar.hpp @@ -43,7 +43,7 @@ class device_scalar : public rmm::device_scalar { explicit device_scalar( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) - : rmm::device_scalar(stream, mr) + : rmm::device_scalar(stream, mr), bounce_buffer{make_host_vector(1, stream)} { } @@ -51,25 +51,41 @@ class device_scalar : public rmm::device_scalar { T const& initial_value, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) - : rmm::device_scalar(stream, mr) + : rmm::device_scalar(stream, mr), bounce_buffer{make_host_vector(1, stream)} { - auto bounce_buffer = make_host_vector(1, stream); - bounce_buffer[0] = initial_value; - // TODO replace with to_device - cuda_memcpy(device_span{this->data(), 1}, bounce_buffer, stream); + bounce_buffer[0] = initial_value; + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); } device_scalar(device_scalar const& other, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) - : rmm::device_scalar(other, stream, mr) + : rmm::device_scalar(other, stream, mr), bounce_buffer{make_host_vector(1, stream)} { } [[nodiscard]] T value(rmm::cuda_stream_view stream) const { - return make_host_vector_sync(device_span{this->data(), 1}, stream)[0]; + cuda_memcpy(bounce_buffer, device_span{this->data(), 1}, stream); + return bounce_buffer[0]; } + + void set_value_async(T const& value, rmm::cuda_stream_view stream) + { + bounce_buffer[0] = value; + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + void set_value_async(T&& value, rmm::cuda_stream_view stream) + { + bounce_buffer[0] = std::move(value); + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + void set_value_to_zero_async(rmm::cuda_stream_view stream) { set_value_async(T{}, stream); } + + private: + mutable cudf::detail::host_vector bounce_buffer; }; } // namespace detail