diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index ca8932322bf..d0afa8e3d36 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -30,6 +30,7 @@ #include #include +#include #include namespace cudf { @@ -364,6 +365,57 @@ class host_buffer_source final : public datasource { cudf::host_span _h_buffer; ///< A non-owning view of the existing host data }; +// sparse host-buffer datasource +class sparse_host_buffer_source final : public datasource { + public: + explicit sparse_host_buffer_source( + std::map> h_buffer_map, size_t full_size) + : _h_buffer_map{h_buffer_map}, _full_size{full_size} + { + } + + const size_t find_chunk_offset(const size_t offset) + { + // Iterate over h_buffer_map to find the appropriate chunk + for (auto const& [chunk_offset, chunk] : _h_buffer_map) { + if (offset < chunk_offset) { + break; + } else if (offset < chunk_offset + chunk.size()) { + return pair; + } + } + return _full_size; // No appropriate offset found + } + + size_t host_read(size_t offset, size_t size, uint8_t* dst) override + { + auto const chunk_offset = this->find_chunk_offset(offset); + CUDF_EXPECTS(chunk_offset != _full_size, "Invalid sparse host-buffer offset"); + auto const& chunk = _h_buffer_map[chunk_offset]; + auto const count = std::min(size, chunk.size() - (offset - chunk_offset)); + std::memcpy(dst, chunk.data() + offset - chunk_offset, count); + return count; + } + + std::unique_ptr host_read(size_t offset, size_t size) override + { + auto const chunk_offset = this->find_chunk_offset(offset); + CUDF_EXPECTS(chunk_offset != _full_size, "Invalid sparse host-buffer offset"); + auto const& chunk = _h_buffer_map[chunk_offset]; + auto const count = std::min(size, chunk.size() - (offset - chunk_offset)); + return std::make_unique( + reinterpret_cast(chunk.data() + offset - chunk_offset), count); + } + + [[nodiscard]] bool supports_device_read() const override { return false; } + + [[nodiscard]] size_t size() const override { return _full_size; } + + private: + size_t _full_size; + std::map> _h_buffer_map; +}; + /** * @brief Wrapper class for user implemented data sources * @@ -453,6 +505,12 @@ std::unique_ptr datasource::create(cudf::host_span return std::make_unique(buffer); } +std::unique_ptr datasource::create( + std::map> buffer_map) +{ + return std::make_unique(buffer_map); +} + std::unique_ptr datasource::create(cudf::device_span buffer) { return std::make_unique(buffer);