Skip to content

Commit

Permalink
Merge pull request #16571 from bdice/branch-24.10-merge-24.08
Browse files Browse the repository at this point in the history
Forward-merge branch-24.08 into branch-24.10
  • Loading branch information
AyodeAwe authored Aug 15, 2024
2 parents ac42bc8 + ed31523 commit 6912246
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 10 deletions.
21 changes: 15 additions & 6 deletions docs/cudf/source/cudf_pandas/how-it-works.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,20 @@ allocation may be a bottleneck depending on the workload. Managed memory
enables oversubscribing GPU memory. This allows cudf.pandas to process
data larger than GPU memory in many cases, without CPU (Pandas) fallback.

```{note}
CUDA Managed Memory on Windows, and more specifically Windows Subsystem for
Linux (WSL2), [does not support oversubscription](
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#system-requirements-for-unified-memory),
only unified addressing. Furthermore, managed memory on WSL2 has undesirable
performance characteristics. Therefore, `cudf.pandas` uses a non-managed pool
allocator on WSL2, so `cudf.pandas` is limited to the physical size of GPU memory.
```

Other memory allocators can be used by changing the environment
variable `CUDF_PANDAS_RMM_MODE` to one of the following.
variable `CUDF_PANDAS_RMM_MODE` to one of the following:

1. "managed_pool" (default): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator.
2. "managed": CUDA Unified Memory, (managed memory) with no pool allocator.
3. "async": CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory.
4. "pool": RMM's asynchronous pool allocator with normal CUDA device memory.
5. "cuda": normal CUDA device memory with no pool allocator.
1. `"managed_pool"` (default, if supported): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator.
2. `"managed"`: CUDA Unified Memory, (managed memory) with no pool allocator.
3. `"async"`: CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory.
4. `"pool"` (default if `"managed_pool"` is not supported): RMM's asynchronous pool allocator with normal CUDA device memory.
5. `"cuda"`: normal CUDA device memory with no pool allocator.
22 changes: 22 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ from libc.stdint cimport uintptr_t
from libcpp.functional cimport reference_wrapper
from libcpp.vector cimport vector

from cuda import cudart

from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type

Expand Down Expand Up @@ -34,3 +36,23 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source):
c_scalars.push_back(
reference_wrapper[constscalar](dereference((<Scalar?>slr).c_obj)))
return c_scalars


def _is_concurrent_managed_access_supported():
"""Check the availability of concurrent managed access (UVM).
Note that WSL2 does not support managed memory.
"""

# Ensure CUDA is initialized before checking cudaDevAttrConcurrentManagedAccess
cudart.cudaFree(0)

device_id = 0
err, supports_managed_access = cudart.cudaDeviceGetAttribute(
cudart.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, device_id
)
if err != cudart.cudaError_t.cudaSuccess:
raise RuntimeError(
f"Failed to check cudaDevAttrConcurrentManagedAccess with error {err}"
)
return supports_managed_access != 0
24 changes: 20 additions & 4 deletions python/cudf/cudf/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
}


def _enable_managed_prefetching(rmm_mode):
if "managed" in rmm_mode:
def _enable_managed_prefetching(rmm_mode, managed_memory_is_supported):
if managed_memory_is_supported and "managed" in rmm_mode:
for key in _SUPPORTED_PREFETCHES:
pylibcudf.experimental.enable_prefetching(key)

Expand All @@ -40,7 +40,20 @@ def install():
global LOADED
LOADED = loader is not None

rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", "managed_pool")
# The default mode is "managed_pool" if UVM is supported, otherwise "pool"
managed_memory_is_supported = (
pylibcudf.utils._is_concurrent_managed_access_supported()
)
default_rmm_mode = (
"managed_pool" if managed_memory_is_supported else "pool"
)
rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", default_rmm_mode)

if "managed" in rmm_mode and not managed_memory_is_supported:
raise ValueError(
f"Managed memory is not supported on this system, so the requested {rmm_mode=} is invalid."
)

# Check if a non-default memory resource is set
current_mr = rmm.mr.get_current_device_resource()
if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
Expand All @@ -53,6 +66,7 @@ def install():
free_memory, _ = rmm.mr.available_device_memory()
free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
new_mr = current_mr

if rmm_mode == "pool":
new_mr = rmm.mr.PoolMemoryResource(
current_mr,
Expand All @@ -71,8 +85,10 @@ def install():
)
elif rmm_mode != "cuda":
raise ValueError(f"Unsupported {rmm_mode=}")

rmm.mr.set_current_device_resource(new_mr)
_enable_managed_prefetching(rmm_mode)

_enable_managed_prefetching(rmm_mode, managed_memory_is_supported)


def pytest_load_initial_conftests(early_config, parser, args):
Expand Down

0 comments on commit 6912246

Please sign in to comment.