-
Notifications
You must be signed in to change notification settings - Fork 920
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Kernel copy for pinned memory (#15934)
Issue #15620 Added an API that enables users to set the threshold under which we perform pinned memory copies using a kernel. The default threshold is zero, so there's no change in default behavior. The API currently only impacts `hostdevice_vector` H<->D synchronization. The PR adds wrappers for `cudaMemcpyAsync` so we can implement configurable behavior for pageable copies as well (e.g. copy to pinned + kernel copy). Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - Mark Harris (https://github.com/harrism) URL: #15934
- Loading branch information
Showing
6 changed files
with
160 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <rmm/cuda_stream_view.hpp> | ||
|
||
namespace cudf::detail { | ||
|
||
enum class host_memory_kind : uint8_t { PINNED, PAGEABLE }; | ||
|
||
/** | ||
* @brief Asynchronously copies data between the host and device. | ||
* | ||
* Implementation may use different strategies depending on the size and type of host data. | ||
* | ||
* @param dst Destination memory address | ||
* @param src Source memory address | ||
* @param size Number of bytes to copy | ||
* @param kind Type of host memory | ||
* @param stream CUDA stream used for the copy | ||
*/ | ||
void cuda_memcpy_async( | ||
void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream); | ||
|
||
/** | ||
* @brief Synchronously copies data between the host and device. | ||
* | ||
* Implementation may use different strategies depending on the size and type of host data. | ||
* | ||
* @param dst Destination memory address | ||
* @param src Source memory address | ||
* @param size Number of bytes to copy | ||
* @param kind Type of host memory | ||
* @param stream CUDA stream used for the copy | ||
*/ | ||
void cuda_memcpy( | ||
void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream); | ||
|
||
} // namespace cudf::detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include <cudf/detail/utilities/cuda_memcpy.hpp> | ||
#include <cudf/utilities/error.hpp> | ||
#include <cudf/utilities/pinned_memory.hpp> | ||
|
||
#include <rmm/exec_policy.hpp> | ||
|
||
#include <thrust/copy.h> | ||
|
||
namespace cudf::detail { | ||
|
||
namespace { | ||
|
||
void copy_pinned(void* dst, void const* src, std::size_t size, rmm::cuda_stream_view stream) | ||
{ | ||
if (size == 0) return; | ||
|
||
if (size < get_kernel_pinned_copy_threshold()) { | ||
thrust::copy_n(rmm::exec_policy_nosync(stream), | ||
static_cast<const char*>(src), | ||
size, | ||
static_cast<char*>(dst)); | ||
} else { | ||
CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault, stream)); | ||
} | ||
} | ||
|
||
void copy_pageable(void* dst, void const* src, std::size_t size, rmm::cuda_stream_view stream) | ||
{ | ||
if (size == 0) return; | ||
|
||
CUDF_CUDA_TRY(cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault, stream)); | ||
} | ||
|
||
}; // namespace | ||
|
||
void cuda_memcpy_async( | ||
void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream) | ||
{ | ||
if (kind == host_memory_kind::PINNED) { | ||
copy_pinned(dst, src, size, stream); | ||
} else if (kind == host_memory_kind::PAGEABLE) { | ||
copy_pageable(dst, src, size, stream); | ||
} else { | ||
CUDF_FAIL("Unsupported host memory kind"); | ||
} | ||
} | ||
|
||
void cuda_memcpy( | ||
void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream) | ||
{ | ||
cuda_memcpy_async(dst, src, size, kind, stream); | ||
stream.synchronize(); | ||
} | ||
|
||
} // namespace cudf::detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters