From 496151225aaf90318c089939d3a74e6ccee4e28d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 14 Aug 2024 17:32:28 -0500 Subject: [PATCH] Ensure managed memory is supported in cudf.pandas. (#16552) Currently, WSL users of `cudf.pandas` can try to enable UVM (managed memory) but it is not supported by the driver. This PR detects whether UVM is supported before enabling a managed memory pool or prefetching. Closes https://github.com/rapidsai/cudf/issues/16551. --------- Co-authored-by: Vyas Ramasubramani Co-authored-by: Lawrence Mitchell --- dependencies.yaml | 2 +- docs/cudf/source/cudf_pandas/how-it-works.md | 21 ++++++++++++----- python/cudf/cudf/_lib/pylibcudf/utils.pyx | 22 ++++++++++++++++++ python/cudf/cudf/pandas/__init__.py | 24 ++++++++++++++++---- python/cudf_polars/pyproject.toml | 2 +- 5 files changed, 59 insertions(+), 12 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 7ecce362101..4c93ef60dd3 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -631,7 +631,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.0 + - polars>=1.0,<1.3 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] diff --git a/docs/cudf/source/cudf_pandas/how-it-works.md b/docs/cudf/source/cudf_pandas/how-it-works.md index 8efd9d7e063..0bb87f60afe 100644 --- a/docs/cudf/source/cudf_pandas/how-it-works.md +++ b/docs/cudf/source/cudf_pandas/how-it-works.md @@ -44,11 +44,20 @@ allocation may be a bottleneck depending on the workload. Managed memory enables oversubscribing GPU memory. This allows cudf.pandas to process data larger than GPU memory in many cases, without CPU (Pandas) fallback. +```{note} +CUDA Managed Memory on Windows, and more specifically Windows Subsystem for +Linux (WSL2), [does not support oversubscription]( +https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#system-requirements-for-unified-memory), +only unified addressing. Furthermore, managed memory on WSL2 has undesirable +performance characteristics. Therefore, `cudf.pandas` uses a non-managed pool +allocator on WSL2, so `cudf.pandas` is limited to the physical size of GPU memory. +``` + Other memory allocators can be used by changing the environment -variable `CUDF_PANDAS_RMM_MODE` to one of the following. +variable `CUDF_PANDAS_RMM_MODE` to one of the following: -1. "managed_pool" (default): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator. -2. "managed": CUDA Unified Memory, (managed memory) with no pool allocator. -3. "async": CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory. -4. "pool": RMM's asynchronous pool allocator with normal CUDA device memory. -5. "cuda": normal CUDA device memory with no pool allocator. +1. `"managed_pool"` (default, if supported): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator. +2. `"managed"`: CUDA Unified Memory, (managed memory) with no pool allocator. +3. `"async"`: CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory. +4. `"pool"` (default if `"managed_pool"` is not supported): RMM's asynchronous pool allocator with normal CUDA device memory. +5. `"cuda"`: normal CUDA device memory with no pool allocator. diff --git a/python/cudf/cudf/_lib/pylibcudf/utils.pyx b/python/cudf/cudf/_lib/pylibcudf/utils.pyx index b4427e8ecff..42e3575ed44 100644 --- a/python/cudf/cudf/_lib/pylibcudf/utils.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/utils.pyx @@ -6,6 +6,8 @@ from libc.stdint cimport uintptr_t from libcpp.functional cimport reference_wrapper from libcpp.vector cimport vector +from cuda import cudart + from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type @@ -34,3 +36,23 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source): c_scalars.push_back( reference_wrapper[constscalar](dereference((slr).c_obj))) return c_scalars + + +def _is_concurrent_managed_access_supported(): + """Check the availability of concurrent managed access (UVM). + + Note that WSL2 does not support managed memory. + """ + + # Ensure CUDA is initialized before checking cudaDevAttrConcurrentManagedAccess + cudart.cudaFree(0) + + device_id = 0 + err, supports_managed_access = cudart.cudaDeviceGetAttribute( + cudart.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, device_id + ) + if err != cudart.cudaError_t.cudaSuccess: + raise RuntimeError( + f"Failed to check cudaDevAttrConcurrentManagedAccess with error {err}" + ) + return supports_managed_access != 0 diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index a6667a7bcd9..e88e795671e 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -26,8 +26,8 @@ } -def _enable_managed_prefetching(rmm_mode): - if "managed" in rmm_mode: +def _enable_managed_prefetching(rmm_mode, managed_memory_is_supported): + if managed_memory_is_supported and "managed" in rmm_mode: for key in _SUPPORTED_PREFETCHES: pylibcudf.experimental.enable_prefetching(key) @@ -40,7 +40,20 @@ def install(): global LOADED LOADED = loader is not None - rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", "managed_pool") + # The default mode is "managed_pool" if UVM is supported, otherwise "pool" + managed_memory_is_supported = ( + pylibcudf.utils._is_concurrent_managed_access_supported() + ) + default_rmm_mode = ( + "managed_pool" if managed_memory_is_supported else "pool" + ) + rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", default_rmm_mode) + + if "managed" in rmm_mode and not managed_memory_is_supported: + raise ValueError( + f"Managed memory is not supported on this system, so the requested {rmm_mode=} is invalid." + ) + # Check if a non-default memory resource is set current_mr = rmm.mr.get_current_device_resource() if not isinstance(current_mr, rmm.mr.CudaMemoryResource): @@ -53,6 +66,7 @@ def install(): free_memory, _ = rmm.mr.available_device_memory() free_memory = int(round(float(free_memory) * 0.80 / 256) * 256) new_mr = current_mr + if rmm_mode == "pool": new_mr = rmm.mr.PoolMemoryResource( current_mr, @@ -71,8 +85,10 @@ def install(): ) elif rmm_mode != "cuda": raise ValueError(f"Unsupported {rmm_mode=}") + rmm.mr.set_current_device_resource(new_mr) - _enable_managed_prefetching(rmm_mode) + + _enable_managed_prefetching(rmm_mode, managed_memory_is_supported) def pytest_load_initial_conftests(early_config, parser, args): diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index def1d086cc1..7b29ad3373d 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "cudf==24.8.*,>=0.0.0a0", - "polars>=1.0", + "polars>=1.0,<1.3", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers",