diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index b0f9e9f0e74..386c5ebe478 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -45,7 +45,10 @@ void prefetch_col_data(ColumnView& col, void const* data_ptr, std::string_view k key, data_ptr, col.size() * size_of(col.type()), cudf::get_default_stream()); } else if (col.type().id() == type_id::STRING) { strings_column_view scv{col}; - + if (data_ptr == nullptr) { + // Do not call chars_size if the data_ptr is nullptr. + return; + } cudf::experimental::prefetch::detail::prefetch_noexcept( key, data_ptr, diff --git a/cpp/src/utilities/prefetch.cpp b/cpp/src/utilities/prefetch.cpp index 86d6cc00764..58971552758 100644 --- a/cpp/src/utilities/prefetch.cpp +++ b/cpp/src/utilities/prefetch.cpp @@ -51,6 +51,20 @@ cudaError_t prefetch_noexcept(std::string_view key, rmm::cuda_stream_view stream, rmm::cuda_device_id device_id) noexcept { + // Don't try to prefetch nullptrs or empty data. Sometimes libcudf has column + // views that use nullptrs with a nonzero size as an optimization. + if (ptr == nullptr) { + if (prefetch_config::instance().debug) { + std::cerr << "Skipping prefetch of nullptr" << std::endl; + } + return cudaSuccess; + } + if (size == 0) { + if (prefetch_config::instance().debug) { + std::cerr << "Skipping prefetch of size 0" << std::endl; + } + return cudaSuccess; + } if (prefetch_config::instance().get(key)) { if (prefetch_config::instance().debug) { std::cerr << "Prefetching " << size << " bytes for key " << key << " at location " << ptr diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index bf88c950385..a6667a7bcd9 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -7,6 +7,8 @@ import rmm.mr +from cudf._lib import pylibcudf + from .fast_slow_proxy import is_proxy_object from .magics import load_ipython_extension from .profiler import Profiler @@ -16,6 +18,19 @@ LOADED = False +_SUPPORTED_PREFETCHES = { + "column_view::get_data", + "mutable_column_view::get_data", + "gather", + "hash_join", +} + + +def _enable_managed_prefetching(rmm_mode): + if "managed" in rmm_mode: + for key in _SUPPORTED_PREFETCHES: + pylibcudf.experimental.enable_prefetching(key) + def install(): """Enable Pandas Accelerator Mode.""" @@ -33,7 +48,7 @@ def install(): f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}", UserWarning, ) - return rmm_mode + return free_memory, _ = rmm.mr.available_device_memory() free_memory = int(round(float(free_memory) * 0.80 / 256) * 256) @@ -57,7 +72,7 @@ def install(): elif rmm_mode != "cuda": raise ValueError(f"Unsupported {rmm_mode=}") rmm.mr.set_current_device_resource(new_mr) - return rmm_mode + _enable_managed_prefetching(rmm_mode) def pytest_load_initial_conftests(early_config, parser, args): diff --git a/python/cudf/cudf/pandas/__main__.py b/python/cudf/cudf/pandas/__main__.py index 591744ce793..3a82829eb7a 100644 --- a/python/cudf/cudf/pandas/__main__.py +++ b/python/cudf/cudf/pandas/__main__.py @@ -72,17 +72,7 @@ def main(): args = parser.parse_args() - rmm_mode = install() - if "managed" in rmm_mode: - for key in { - "column_view::get_data", - "mutable_column_view::get_data", - "gather", - "hash_join", - }: - from cudf._lib import pylibcudf - - pylibcudf.experimental.enable_prefetching(key) + install() with profile(args.profile, args.line_profile, args.args[0]) as fn: args.args[0] = fn if args.module: