From 87549d1c832129596422e3cd88b4a0744fe25d5b Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 4 Oct 2024 13:39:10 -0700 Subject: [PATCH 1/3] make mmap and stuff configurable --- cpp/src/io/utilities/datasource.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 0be976b6144..ae570494c44 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -15,6 +15,7 @@ */ #include "file_io_utilities.hpp" +#include "getenv_or.hpp" #include #include @@ -227,6 +228,19 @@ class memory_mapped_source : public file_source { } private: + [[nodiscard]] bool should_register_mmap_buffer() + { + if (_map_addr == nullptr) { return false; } + + auto const policy = getenv_or("LIBCUDF_MMAP_REGISTER_ENABLED", std::string{"AUTO"}); + + if (policy == "ALWAYS") { return true; } + if (policy == "AUTO") { return pageableMemoryAccessUsesHostPageTables(); } + if (policy == "OFF") { return false; } + + CUDF_FAIL("Invalid LIBCUDF_MMAP_REGISTER_POLICY value: " + policy); + } + /** * @brief Page-locks (registers) the memory range of the mapped file. * @@ -234,7 +248,7 @@ class memory_mapped_source : public file_source { */ void register_mmap_buffer(size_t offset, size_t size) { - if (_map_addr == nullptr or not pageableMemoryAccessUsesHostPageTables()) { return; } + if (not should_register_mmap_buffer()) { return; } // Registered region must be within the mapped region _reg_offset = std::max(offset, _map_offset); @@ -267,8 +281,20 @@ class memory_mapped_source : public file_source { } } + [[nodiscard]] bool should_memory_map() + { + auto const policy = getenv_or("LIBCUDF_MMAP_ENABLED", std::string{"ON"}); + + if (policy == "ON") { return true; } + if (policy == "OFF") { return false; } + + CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy); + } + void map(int fd, size_t offset, size_t size) { + if (not should_memory_map()) { return; } + CUDF_EXPECTS(offset < _file.size(), "Offset is past end of file", std::overflow_error); // Offset for `mmap()` must be page aligned From 1fa302ecf17fd2873052aa6c39c6ed224332d423 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 4 Oct 2024 14:30:04 -0700 Subject: [PATCH 2/3] remove cufile_integration::is_always_enabled() workaround --- cpp/src/io/utilities/datasource.cpp | 33 ++++++++++++----------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index ae570494c44..67f14277989 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -281,20 +281,8 @@ class memory_mapped_source : public file_source { } } - [[nodiscard]] bool should_memory_map() - { - auto const policy = getenv_or("LIBCUDF_MMAP_ENABLED", std::string{"ON"}); - - if (policy == "ON") { return true; } - if (policy == "OFF") { return false; } - - CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy); - } - void map(int fd, size_t offset, size_t size) { - if (not should_memory_map()) { return; } - CUDF_EXPECTS(offset < _file.size(), "Offset is past end of file", std::overflow_error); // Offset for `mmap()` must be page aligned @@ -493,15 +481,22 @@ std::unique_ptr datasource::create(std::string const& filepath, CUDF_EXPECTS(max_size_estimate == 0 or min_size_estimate <= max_size_estimate, "Invalid min/max size estimates for datasource creation"); -#ifdef CUFILE_FOUND - if (cufile_integration::is_always_enabled()) { - // avoid mmap as GDS is expected to be used for most reads + auto const use_memory_mapping = [] { + auto const policy = getenv_or("LIBCUDF_MMAP_ENABLED", std::string{"ON"}); + + if (policy == "ON") { return true; } + if (policy == "OFF") { return false; } + + CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy); + }(); + + if (use_memory_mapping) { + return std::make_unique( + filepath.c_str(), offset, max_size_estimate, min_size_estimate); + } else { + // `file_source` reads the file directly without memory mapping return std::make_unique(filepath.c_str()); } -#endif - // Use our own memory mapping implementation for direct file reads - return std::make_unique( - filepath.c_str(), offset, max_size_estimate, min_size_estimate); } std::unique_ptr datasource::create(host_buffer const& buffer) From 3ea1e35beeea96f81047879fa37bf683b91ca983 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 8 Oct 2024 14:17:26 -0700 Subject: [PATCH 3/3] ON --- cpp/src/io/utilities/datasource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 67f14277989..2f47f1a35e0 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -234,7 +234,7 @@ class memory_mapped_source : public file_source { auto const policy = getenv_or("LIBCUDF_MMAP_REGISTER_ENABLED", std::string{"AUTO"}); - if (policy == "ALWAYS") { return true; } + if (policy == "ON") { return true; } if (policy == "AUTO") { return pageableMemoryAccessUsesHostPageTables(); } if (policy == "OFF") { return false; }