Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP][RFC] Add sparse host buffer source #16252

Draft
wants to merge 2 commits into
base: branch-24.08
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <sys/mman.h>
#include <unistd.h>

#include <map>
#include <unordered_map>

namespace cudf {
Expand Down Expand Up @@ -364,6 +365,57 @@ class host_buffer_source final : public datasource {
cudf::host_span<std::byte const> _h_buffer; ///< A non-owning view of the existing host data
};

// sparse host-buffer datasource
class sparse_host_buffer_source final : public datasource {
public:
explicit sparse_host_buffer_source(
std::map<size_t, cudf::host_span<std::byte const>> h_buffer_map, size_t full_size)
: _h_buffer_map{h_buffer_map}, _full_size{full_size}
{
}

const size_t find_chunk_offset(const size_t offset)
{
// Iterate over h_buffer_map to find the appropriate chunk
for (auto const& [chunk_offset, chunk] : _h_buffer_map) {
if (offset < chunk_offset) {
break;
} else if (offset < chunk_offset + chunk.size()) {
return pair;
}
}
return _full_size; // No appropriate offset found
}

size_t host_read(size_t offset, size_t size, uint8_t* dst) override
{
auto const chunk_offset = this->find_chunk_offset(offset);
CUDF_EXPECTS(chunk_offset != _full_size, "Invalid sparse host-buffer offset");
auto const& chunk = _h_buffer_map[chunk_offset];
auto const count = std::min(size, chunk.size() - (offset - chunk_offset));
std::memcpy(dst, chunk.data() + offset - chunk_offset, count);
return count;
}

std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
{
auto const chunk_offset = this->find_chunk_offset(offset);
CUDF_EXPECTS(chunk_offset != _full_size, "Invalid sparse host-buffer offset");
auto const& chunk = _h_buffer_map[chunk_offset];
auto const count = std::min(size, chunk.size() - (offset - chunk_offset));
return std::make_unique<non_owning_buffer>(
reinterpret_cast<uint8_t const*>(chunk.data() + offset - chunk_offset), count);
}

[[nodiscard]] bool supports_device_read() const override { return false; }

[[nodiscard]] size_t size() const override { return _full_size; }

private:
size_t _full_size;
std::map<size_t, cudf::host_span<std::byte const>> _h_buffer_map;
};

/**
* @brief Wrapper class for user implemented data sources
*
Expand Down Expand Up @@ -453,6 +505,12 @@ std::unique_ptr<datasource> datasource::create(cudf::host_span<std::byte const>
return std::make_unique<host_buffer_source>(buffer);
}

std::unique_ptr<datasource> datasource::create(
std::map<size_t, cudf::host_span<std::byte const>> buffer_map)
{
return std::make_unique<sparse_host_buffer_source>(buffer_map);
}

std::unique_ptr<datasource> datasource::create(cudf::device_span<std::byte const> buffer)
{
return std::make_unique<device_buffer_source>(buffer);
Expand Down
Loading