Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure managed memory is supported in cudf.pandas. #16552

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions docs/cudf/source/cudf_pandas/how-it-works.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,17 @@ allocation may be a bottleneck depending on the workload. Managed memory
enables oversubscribing GPU memory. This allows cudf.pandas to process
data larger than GPU memory in many cases, without CPU (Pandas) fallback.

```{note}
CUDA Unified Memory is not supported on Windows Subsystem for Linux (WSL2), so
`cudf.pandas` uses a non-managed pool allocator. In WSL2, `cudf.pandas` is
limited to the memory size of the GPU it is running on.
bdice marked this conversation as resolved.
Show resolved Hide resolved
```

Other memory allocators can be used by changing the environment
variable `CUDF_PANDAS_RMM_MODE` to one of the following.
variable `CUDF_PANDAS_RMM_MODE` to one of the following:

1. "managed_pool" (default): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator.
2. "managed": CUDA Unified Memory, (managed memory) with no pool allocator.
3. "async": CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory.
4. "pool": RMM's asynchronous pool allocator with normal CUDA device memory.
5. "cuda": normal CUDA device memory with no pool allocator.
1. `"managed_pool"` (default, if supported): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator.
2. `"managed"`: CUDA Unified Memory, (managed memory) with no pool allocator.
3. `"async"`: CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory.
4. `"pool"` (default if `"managed_pool"` is not supported): RMM's asynchronous pool allocator with normal CUDA device memory.
5. `"cuda"`: normal CUDA device memory with no pool allocator.
22 changes: 22 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ from libc.stdint cimport uintptr_t
from libcpp.functional cimport reference_wrapper
from libcpp.vector cimport vector

from cuda import cudart

from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type

Expand Down Expand Up @@ -34,3 +36,23 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source):
c_scalars.push_back(
reference_wrapper[constscalar](dereference((<Scalar?>slr).c_obj)))
return c_scalars


def _is_concurrent_managed_access_supported():
"""Check the availability of concurrent managed access (UVM).

Note that WSL2 does not support managed memory.
"""

# Ensure CUDA is initialized before checking cudaDevAttrConcurrentManagedAccess
cudart.cudaFree(0)

device_id = 0
err, supports_managed_access = cudart.cudaDeviceGetAttribute(
cudart.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, device_id
)
if err != cudart.cudaError_t.cudaSuccess:
raise RuntimeError(
f"Failed to check cudaDevAttrConcurrentManagedAccess with error {err}"
)
return supports_managed_access != 0
24 changes: 20 additions & 4 deletions python/cudf/cudf/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
}


def _enable_managed_prefetching(rmm_mode):
if "managed" in rmm_mode:
def _enable_managed_prefetching(rmm_mode, managed_memory_is_supported):
if managed_memory_is_supported and "managed" in rmm_mode:
for key in _SUPPORTED_PREFETCHES:
pylibcudf.experimental.enable_prefetching(key)

Expand All @@ -40,7 +40,20 @@ def install():
global LOADED
LOADED = loader is not None

rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", "managed_pool")
# The default mode is "managed_pool" if UVM is supported, otherwise "pool"
managed_memory_is_supported = (
pylibcudf.utils._is_concurrent_managed_access_supported()
)
default_rmm_mode = (
"managed_pool" if managed_memory_is_supported else "pool"
)
rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", default_rmm_mode)

if "managed" in rmm_mode and not managed_memory_is_supported:
raise ValueError(
f"Managed memory is not supported on this system, so the requested {rmm_mode=} is invalid."
)

# Check if a non-default memory resource is set
current_mr = rmm.mr.get_current_device_resource()
if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
Expand All @@ -53,6 +66,7 @@ def install():
free_memory, _ = rmm.mr.available_device_memory()
free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
new_mr = current_mr

if rmm_mode == "pool":
new_mr = rmm.mr.PoolMemoryResource(
current_mr,
Expand All @@ -71,8 +85,10 @@ def install():
)
elif rmm_mode != "cuda":
raise ValueError(f"Unsupported {rmm_mode=}")

rmm.mr.set_current_device_resource(new_mr)
_enable_managed_prefetching(rmm_mode)

_enable_managed_prefetching(rmm_mode, managed_memory_is_supported)


def pytest_load_initial_conftests(early_config, parser, args):
Expand Down
Loading