From b8917229f8a2446c7e5f697475f76743a05e6856 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 17 Oct 2024 19:02:30 -0700 Subject: [PATCH] Control whether a file data source memory-maps the file with an environment variable (#17004) Adds an environment variable, `LIBCUDF_MMAP_ENABLED`, to control whether we memory map the input file in the data source. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Nghia Truong (https://github.com/ttnghia) - Tianyu Liu (https://github.com/kingcrimsontianyu) URL: https://github.com/rapidsai/cudf/pull/17004 --- cpp/src/io/utilities/datasource.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 28f7f08521e..2daaecadca6 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -15,6 +15,7 @@ */ #include "file_io_utilities.hpp" +#include "getenv_or.hpp" #include #include @@ -392,14 +393,21 @@ std::unique_ptr datasource::create(std::string const& filepath, size_t offset, size_t max_size_estimate) { -#ifdef CUFILE_FOUND - if (cufile_integration::is_always_enabled()) { - // avoid mmap as GDS is expected to be used for most reads + auto const use_memory_mapping = [] { + auto const policy = getenv_or("LIBCUDF_MMAP_ENABLED", std::string{"ON"}); + + if (policy == "ON") { return true; } + if (policy == "OFF") { return false; } + + CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy); + }(); + + if (use_memory_mapping) { + return std::make_unique(filepath.c_str(), offset, max_size_estimate); + } else { + // `file_source` reads the file directly, without memory mapping return std::make_unique(filepath.c_str()); } -#endif - // Use our own memory mapping implementation for direct file reads - return std::make_unique(filepath.c_str(), offset, max_size_estimate); } std::unique_ptr datasource::create(host_buffer const& buffer)