From 9cd7e1a99c290a8f69df5e14f0615e09439cb95f Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Tue, 24 Sep 2024 11:15:38 -0400 Subject: [PATCH] Apply event raii wrapper --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/utilities/cuda_event.hpp | 31 +++++++++++++++++++ cpp/src/interop/to_arrow_device.cu | 5 ++- cpp/src/io/text/bgzip_data_chunk_source.cu | 4 +-- .../io/text/data_chunk_source_factories.cpp | 6 ++-- cpp/src/io/text/multibyte_split.cu | 6 ++-- cpp/src/utilities/cuda_event.cpp | 14 +++++++++ cpp/src/utilities/stream_pool.cpp | 21 +------------ 8 files changed, 55 insertions(+), 33 deletions(-) create mode 100644 cpp/include/cudf/utilities/cuda_event.hpp create mode 100644 cpp/src/utilities/cuda_event.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 136f43ee706..1d0fd9f9a99 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -659,6 +659,7 @@ add_library( src/unary/nan_ops.cu src/unary/null_ops.cu src/utilities/cuda.cpp + src/utilities/cuda_event.cpp src/utilities/cuda_memcpy.cu src/utilities/default_stream.cpp src/utilities/host_memory.cpp diff --git a/cpp/include/cudf/utilities/cuda_event.hpp b/cpp/include/cudf/utilities/cuda_event.hpp new file mode 100644 index 00000000000..f8eb09ed230 --- /dev/null +++ b/cpp/include/cudf/utilities/cuda_event.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include + +#include + +namespace CUDF_EXPORT cudf { + +namespace detail { +/** + * @brief RAII struct to wrap a cuda event and ensure its proper destruction. + */ +struct cuda_event { + cuda_event(); + virtual ~cuda_event(); + + // Moveable but not copyable. + cuda_event(const cuda_event&) = delete; + cuda_event& operator=(const cuda_event&) = delete; + + cuda_event(cuda_event&&) = default; + cuda_event& operator=(cuda_event&&) = default; + + operator cudaEvent_t() const; + + private: + cudaEvent_t e_; +}; +} // namespace detail + +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu index a2874b46b06..1829b8e2d8f 100644 --- a/cpp/src/interop/to_arrow_device.cu +++ b/cpp/src/interop/to_arrow_device.cu @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -559,13 +560,12 @@ int dispatch_to_arrow_device_view::operator()(ArrowArray* ou struct ArrowDeviceArrayPrivateData { ArrowArray parent; - cudaEvent_t sync_event; + cudf::detail::cuda_event sync_event; }; void ArrowDeviceArrayRelease(ArrowArray* array) { auto private_data = reinterpret_cast(array->private_data); - RMM_ASSERT_CUDA_SUCCESS(cudaEventDestroy(private_data->sync_event)); ArrowArrayRelease(&private_data->parent); delete private_data; array->release = nullptr; @@ -578,7 +578,6 @@ unique_device_array_t create_device_array(nanoarrow::UniqueArray&& out, ArrowArrayFinishBuilding(out.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr)); auto private_data = std::make_unique(); - CUDF_CUDA_TRY(cudaEventCreate(&private_data->sync_event)); CUDF_CUDA_TRY(cudaEventRecord(private_data->sync_event, stream.value())); ArrowArrayMove(out.get(), &private_data->parent); diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index badcd3f58f9..56df6fdd7ab 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -84,7 +85,7 @@ class bgzip_data_chunk_reader : public data_chunk_reader { static constexpr std::size_t default_offset_alloc = 1 << 16; // 64k offset allocation, resized on demand - cudaEvent_t event; + cudf::detail::cuda_event event; cudf::detail::host_vector h_compressed_blocks; cudf::detail::host_vector h_compressed_offsets; cudf::detail::host_vector h_decompressed_offsets; @@ -115,7 +116,6 @@ class bgzip_data_chunk_reader : public data_chunk_reader { d_decompressed_spans(0, init_stream), d_decompression_results(0, init_stream) { - CUDF_CUDA_TRY(cudaEventCreate(&event)); h_compressed_blocks.reserve(default_buffer_alloc); h_compressed_offsets.reserve(default_offset_alloc); h_compressed_offsets.push_back(0); diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 58faa0ebfe4..11f8392c274 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -33,15 +34,12 @@ namespace cudf::io::text { namespace { struct host_ticket { - cudaEvent_t event{}; // tracks the completion of the last device-to-host copy. + cudf::detail::cuda_event event; // tracks the completion of the last device-to-host copy. cudf::detail::host_vector buffer; host_ticket() : buffer{cudf::detail::make_pinned_vector_sync(0, cudf::get_default_stream())} { - cudaEventCreate(&event); } - - ~host_ticket() { cudaEventDestroy(event); } }; /** diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index 028f922bec3..cc728ab7d59 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -383,8 +384,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source auto streams = cudf::detail::fork_streams(stream, concurrency); - cudaEvent_t last_launch_event; - CUDF_CUDA_TRY(cudaEventCreate(&last_launch_event)); + cudf::detail::cuda_event last_launch_event; auto& read_stream = streams[0]; auto& scan_stream = streams[1]; @@ -499,8 +499,6 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source chunk = std::move(next_chunk); } - CUDF_CUDA_TRY(cudaEventDestroy(last_launch_event)); - cudf::detail::join_streams(streams, stream); auto chars = char_storage.gather(stream, mr); diff --git a/cpp/src/utilities/cuda_event.cpp b/cpp/src/utilities/cuda_event.cpp new file mode 100644 index 00000000000..3310c0efac9 --- /dev/null +++ b/cpp/src/utilities/cuda_event.cpp @@ -0,0 +1,14 @@ +#include +#include + +#include + +namespace cudf::detail { + +cuda_event::cuda_event() { CUDF_CUDA_TRY(cudaEventCreateWithFlags(&e_, cudaEventDisableTiming)); } + +cuda_event::~cuda_event() { RMM_ASSERT_CUDA_SUCCESS(cudaEventDestroy(e_)); } + +cuda_event::operator cudaEvent_t() const { return e_; } + +} // namespace cudf::detail diff --git a/cpp/src/utilities/stream_pool.cpp b/cpp/src/utilities/stream_pool.cpp index 8c29182bfb5..953253905fb 100644 --- a/cpp/src/utilities/stream_pool.cpp +++ b/cpp/src/utilities/stream_pool.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -125,26 +126,6 @@ rmm::cuda_device_id get_current_cuda_device() return rmm::cuda_device_id{device_id}; } -/** - * @brief RAII struct to wrap a cuda event and ensure its proper destruction. - */ -struct cuda_event { - cuda_event() { CUDF_CUDA_TRY(cudaEventCreateWithFlags(&e_, cudaEventDisableTiming)); } - virtual ~cuda_event() { CUDF_ASSERT_CUDA_SUCCESS(cudaEventDestroy(e_)); } - - // Moveable but not copyable. - cuda_event(const cuda_event&) = delete; - cuda_event& operator=(const cuda_event&) = delete; - - cuda_event(cuda_event&&) = default; - cuda_event& operator=(cuda_event&&) = default; - - operator cudaEvent_t() { return e_; } - - private: - cudaEvent_t e_; -}; - /** * @brief Returns a cudaEvent_t for the current thread. *