-
-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Test of dynamic dispatch for block codec
- Loading branch information
Showing
4 changed files
with
774 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,307 @@ | ||
#pragma once | ||
|
||
#include "bit_vector.hpp" | ||
#include "codec/block_codec.hpp" | ||
#include "codec/block_codecs.hpp" | ||
#include "codec/compact_elias_fano.hpp" | ||
#include "concepts.hpp" | ||
#include "concepts/inverted_index.hpp" | ||
#include "global_parameters.hpp" | ||
#include "mappable/mappable_vector.hpp" | ||
#include "mappable/mapper.hpp" | ||
#include "memory_source.hpp" | ||
|
||
namespace pisa { | ||
|
||
class BlockInvertedIndexCursor { | ||
public: | ||
BlockInvertedIndexCursor(BlockCodec const* block_codec, std::uint8_t const* data, std::uint64_t universe) | ||
: m_base(TightVariableByte::decode(data, &m_n, 1)), | ||
m_blocks(ceil_div(m_n, block_codec->block_size())), | ||
m_block_maxs(m_base), | ||
m_block_endpoints(m_block_maxs + 4 * m_blocks), | ||
m_blocks_data(m_block_endpoints + 4 * (m_blocks - 1)), | ||
m_universe(universe), | ||
m_block_codec(block_codec), | ||
m_block_size(block_codec->block_size()) { | ||
PISA_ASSERT_CONCEPT( | ||
(concepts::FrequencyPostingCursor<BlockInvertedIndexCursor> | ||
&& concepts::SortedPostingCursor<BlockInvertedIndexCursor>) | ||
); | ||
|
||
m_docs_buf.resize(m_block_size); | ||
m_freqs_buf.resize(m_block_size); | ||
reset(); | ||
} | ||
|
||
void reset() { decode_docs_block(0); } | ||
|
||
void PISA_ALWAYSINLINE next() { | ||
++m_pos_in_block; | ||
if PISA_UNLIKELY (m_pos_in_block == m_cur_block_size) { | ||
if (m_cur_block + 1 == m_blocks) { | ||
m_cur_docid = m_universe; | ||
return; | ||
} | ||
decode_docs_block(m_cur_block + 1); | ||
} else { | ||
m_cur_docid += m_docs_buf[m_pos_in_block] + 1; | ||
} | ||
} | ||
|
||
/** | ||
* Moves to the next document, counting from the current position, | ||
* with the ID equal to or greater than `lower_bound`. | ||
* | ||
* In particular, if called with a value that is less than or equal | ||
* to the current document ID, the position will not change. | ||
*/ | ||
void PISA_ALWAYSINLINE next_geq(uint64_t lower_bound) { | ||
if PISA_UNLIKELY (lower_bound > m_cur_block_max) { | ||
// binary search seems to perform worse here | ||
if (lower_bound > block_max(m_blocks - 1)) { | ||
m_cur_docid = m_universe; | ||
return; | ||
} | ||
|
||
uint64_t block = m_cur_block + 1; | ||
while (block_max(block) < lower_bound) { | ||
++block; | ||
} | ||
|
||
decode_docs_block(block); | ||
} | ||
|
||
while (docid() < lower_bound) { | ||
m_cur_docid += m_docs_buf[++m_pos_in_block] + 1; | ||
assert(m_pos_in_block < m_cur_block_size); | ||
} | ||
} | ||
|
||
void PISA_ALWAYSINLINE move(uint64_t pos) { | ||
assert(pos >= position()); | ||
uint64_t block = pos / m_block_size; | ||
if PISA_UNLIKELY (block != m_cur_block) { | ||
decode_docs_block(block); | ||
} | ||
while (position() < pos) { | ||
m_cur_docid += m_docs_buf[++m_pos_in_block] + 1; | ||
} | ||
} | ||
|
||
uint64_t docid() const { return m_cur_docid; } | ||
|
||
uint64_t PISA_ALWAYSINLINE freq() { | ||
if (!m_freqs_decoded) { | ||
decode_freqs_block(); | ||
} | ||
return m_freqs_buf[m_pos_in_block] + 1; | ||
} | ||
|
||
uint64_t PISA_ALWAYSINLINE value() { return freq(); } | ||
|
||
uint64_t position() const { return m_cur_block * m_block_size + m_pos_in_block; } | ||
|
||
uint64_t size() const noexcept { return m_n; } | ||
|
||
uint64_t num_blocks() const { return m_blocks; } | ||
|
||
uint64_t stats_freqs_size() const { | ||
// XXX rewrite in terms of get_blocks() | ||
uint64_t bytes = 0; | ||
uint8_t const* ptr = m_blocks_data; | ||
static const uint64_t block_size = m_block_size; | ||
std::vector<uint32_t> buf(block_size); | ||
for (size_t b = 0; b < m_blocks; ++b) { | ||
uint32_t cur_block_size = | ||
((b + 1) * block_size <= size()) ? block_size : (size() % block_size); | ||
|
||
uint32_t cur_base = (b != 0U ? block_max(b - 1) : uint32_t(-1)) + 1; | ||
uint8_t const* freq_ptr = m_block_codec->decode( | ||
ptr, buf.data(), block_max(b) - cur_base - (cur_block_size - 1), cur_block_size | ||
); | ||
ptr = m_block_codec->decode(freq_ptr, buf.data(), uint32_t(-1), cur_block_size); | ||
bytes += ptr - freq_ptr; | ||
} | ||
|
||
return bytes; | ||
} | ||
|
||
struct block_data { | ||
uint32_t index; | ||
uint32_t max; | ||
uint32_t size; | ||
uint32_t doc_gaps_universe; | ||
uint8_t const* docs_begin; | ||
uint8_t const* freqs_begin; | ||
uint8_t const* end; | ||
BlockCodec const* block_codec; | ||
|
||
void append_docs_block(std::vector<uint8_t>& out) const { | ||
out.insert(out.end(), docs_begin, freqs_begin); | ||
} | ||
|
||
void append_freqs_block(std::vector<uint8_t>& out) const { | ||
out.insert(out.end(), freqs_begin, end); | ||
} | ||
|
||
void decode_doc_gaps(std::vector<uint32_t>& out) const { | ||
out.resize(size); | ||
block_codec->decode(docs_begin, out.data(), doc_gaps_universe, size); | ||
} | ||
|
||
void decode_freqs(std::vector<uint32_t>& out) const { | ||
out.resize(size); | ||
block_codec->decode(freqs_begin, out.data(), uint32_t(-1), size); | ||
} | ||
}; | ||
|
||
std::vector<block_data> get_blocks() { | ||
std::vector<block_data> blocks; | ||
|
||
uint8_t const* ptr = m_blocks_data; | ||
static const uint64_t block_size = m_block_size; | ||
std::vector<uint32_t> buf(block_size); | ||
for (size_t b = 0; b < m_blocks; ++b) { | ||
blocks.emplace_back(); | ||
uint32_t cur_block_size = | ||
((b + 1) * block_size <= size()) ? block_size : (size() % block_size); | ||
|
||
uint32_t cur_base = (b != 0U ? block_max(b - 1) : uint32_t(-1)) + 1; | ||
uint32_t gaps_universe = block_max(b) - cur_base - (cur_block_size - 1); | ||
|
||
blocks.back().index = b; | ||
blocks.back().size = cur_block_size; | ||
blocks.back().docs_begin = ptr; | ||
blocks.back().doc_gaps_universe = gaps_universe; | ||
blocks.back().max = block_max(b); | ||
blocks.back().block_codec = m_block_codec; | ||
|
||
uint8_t const* freq_ptr = | ||
m_block_codec->decode(ptr, buf.data(), gaps_universe, cur_block_size); | ||
blocks.back().freqs_begin = freq_ptr; | ||
ptr = m_block_codec->decode(freq_ptr, buf.data(), uint32_t(-1), cur_block_size); | ||
blocks.back().end = ptr; | ||
} | ||
|
||
assert(blocks.size() == num_blocks()); | ||
return blocks; | ||
} | ||
|
||
private: | ||
uint32_t block_max(uint32_t block) const { return ((uint32_t const*)m_block_maxs)[block]; } | ||
|
||
void PISA_NOINLINE decode_docs_block(uint64_t block) { | ||
static const uint64_t block_size = m_block_size; | ||
uint32_t endpoint = block != 0U ? ((uint32_t const*)m_block_endpoints)[block - 1] : 0; | ||
uint8_t const* block_data = m_blocks_data + endpoint; | ||
m_cur_block_size = ((block + 1) * block_size <= size()) ? block_size : (size() % block_size); | ||
uint32_t cur_base = (block != 0U ? block_max(block - 1) : uint32_t(-1)) + 1; | ||
m_cur_block_max = block_max(block); | ||
m_freqs_block_data = m_block_codec->decode( | ||
block_data, m_docs_buf.data(), m_cur_block_max - cur_base - (m_cur_block_size - 1), m_cur_block_size | ||
); | ||
intrinsics::prefetch(m_freqs_block_data); | ||
|
||
m_docs_buf[0] += cur_base; | ||
|
||
m_cur_block = block; | ||
m_pos_in_block = 0; | ||
m_cur_docid = m_docs_buf[0]; | ||
m_freqs_decoded = false; | ||
} | ||
|
||
void PISA_NOINLINE decode_freqs_block() { | ||
uint8_t const* next_block = m_block_codec->decode( | ||
m_freqs_block_data, m_freqs_buf.data(), uint32_t(-1), m_cur_block_size | ||
); | ||
intrinsics::prefetch(next_block); | ||
m_freqs_decoded = true; | ||
} | ||
|
||
uint32_t m_n{0}; | ||
uint8_t const* m_base; | ||
uint32_t m_blocks; | ||
uint8_t const* m_block_maxs; | ||
uint8_t const* m_block_endpoints; | ||
uint8_t const* m_blocks_data; | ||
uint64_t m_universe; | ||
|
||
uint32_t m_cur_block{0}; | ||
uint32_t m_pos_in_block{0}; | ||
uint32_t m_cur_block_max{0}; | ||
uint32_t m_cur_block_size{0}; | ||
uint32_t m_cur_docid{0}; | ||
|
||
uint8_t const* m_freqs_block_data{nullptr}; | ||
bool m_freqs_decoded{false}; | ||
|
||
std::vector<uint32_t> m_docs_buf; | ||
std::vector<uint32_t> m_freqs_buf; | ||
BlockCodec const* m_block_codec; | ||
std::size_t m_block_size; | ||
}; | ||
|
||
class BlockInvertedIndex { | ||
private: | ||
global_parameters m_params; | ||
std::size_t m_size{0}; | ||
std::size_t m_num_docs{0}; | ||
bit_vector m_endpoints; | ||
mapper::mappable_vector<std::uint8_t> m_lists; | ||
MemorySource m_source; | ||
std::unique_ptr<BlockCodec> m_block_codec; | ||
|
||
public: | ||
using document_enumerator = BlockInvertedIndexCursor; | ||
|
||
explicit BlockInvertedIndex(MemorySource source, std::unique_ptr<BlockCodec> block_codec) | ||
: m_source(std::move(source)), m_block_codec(std::move(block_codec)) { | ||
PISA_ASSERT_CONCEPT( | ||
(concepts::SortedInvertedIndex<BlockInvertedIndex, BlockInvertedIndexCursor>) | ||
); | ||
mapper::map(*this, m_source.data(), mapper::map_flags::warmup); | ||
} | ||
|
||
template <typename Visitor> | ||
void map(Visitor& visit) { | ||
visit(m_params, "m_params")(m_size, "m_size")(m_num_docs, "m_num_docs")( | ||
m_endpoints, "m_endpoints")(m_lists, "m_lists"); | ||
} | ||
|
||
[[nodiscard]] auto operator[](std::size_t term_id) const -> BlockInvertedIndexCursor { | ||
// check_term_range(term_id); | ||
compact_elias_fano::enumerator endpoints(m_endpoints, 0, m_lists.size(), m_size, m_params); | ||
auto endpoint = endpoints.move(term_id).second; | ||
return BlockInvertedIndexCursor(m_block_codec.get(), m_lists.data() + endpoint, num_docs()); | ||
} | ||
|
||
/** | ||
* \returns The size of the index, i.e., the number of terms (posting lists). | ||
*/ | ||
[[nodiscard]] std::size_t size() const noexcept { return m_size; } | ||
|
||
/** | ||
* \returns The number of distinct documents in the index. | ||
*/ | ||
[[nodiscard]] std::uint64_t num_docs() const noexcept { return m_num_docs; } | ||
|
||
void warmup(std::size_t term_id) const { | ||
// check_term_range(term_id); | ||
compact_elias_fano::enumerator endpoints(m_endpoints, 0, m_lists.size(), m_size, m_params); | ||
|
||
auto begin = endpoints.move(term_id).second; | ||
auto end = m_lists.size(); | ||
if (term_id + 1 != size()) { | ||
end = endpoints.move(term_id + 1).second; | ||
} | ||
|
||
volatile std::uint32_t tmp; | ||
for (std::size_t i = begin; i != end; ++i) { | ||
tmp = m_lists[i]; | ||
} | ||
(void)tmp; | ||
} | ||
}; | ||
|
||
}; // namespace pisa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <vector> | ||
|
||
#include "codec/block_codecs.hpp" | ||
#include "util/util.hpp" | ||
|
||
extern "C" { | ||
#include "simdcomp/include/simdbitpacking.h" | ||
} | ||
|
||
namespace pisa { | ||
|
||
class BlockCodec { | ||
public: | ||
virtual void encode( | ||
std::uint32_t const* in, std::uint32_t sum_of_values, std::size_t n, std::vector<uint8_t>& out | ||
) const = 0; | ||
|
||
virtual std::uint8_t const* decode( | ||
std::uint8_t const* in, std::uint32_t* out, std::uint32_t sum_of_values, std::size_t n | ||
) const = 0; | ||
|
||
[[nodiscard]] virtual auto block_size() const noexcept -> std::size_t = 0; | ||
}; | ||
|
||
class SimdBpBlockCodec: public BlockCodec { | ||
static constexpr std::uint64_t m_block_size = 128; | ||
|
||
void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const { | ||
assert(n <= m_block_size); | ||
auto* src = const_cast<uint32_t*>(in); | ||
if (n < m_block_size) { | ||
interpolative_block::encode(src, sum_of_values, n, out); | ||
return; | ||
} | ||
uint32_t b = maxbits(in); | ||
thread_local std::vector<uint8_t> buf(8 * n); | ||
uint8_t* buf_ptr = buf.data(); | ||
*buf_ptr++ = b; | ||
simdpackwithoutmask(src, (__m128i*)buf_ptr, b); | ||
out.insert(out.end(), buf.data(), buf.data() + b * sizeof(__m128i) + 1); | ||
} | ||
|
||
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { | ||
assert(n <= m_block_size); | ||
if PISA_UNLIKELY (n < m_block_size) { | ||
return interpolative_block::decode(in, out, sum_of_values, n); | ||
} | ||
uint32_t b = *in++; | ||
simdunpack((const __m128i*)in, out, b); | ||
return in + b * sizeof(__m128i); | ||
} | ||
|
||
auto block_size() const noexcept -> std::size_t { return m_block_size; } | ||
}; | ||
|
||
class Simple16BlockCodec: public BlockCodec { | ||
static constexpr std::uint64_t m_block_size = 128; | ||
|
||
void | ||
encode(uint32_t const* in, uint32_t /* sum_of_values */, size_t n, std::vector<uint8_t>& out) const { | ||
assert(n <= m_block_size); | ||
thread_local FastPForLib::Simple16<false> codec; | ||
thread_local std::array<std::uint8_t, 2 * 8 * m_block_size> buf{}; | ||
size_t out_len = buf.size(); | ||
codec.encodeArray(in, n, reinterpret_cast<uint32_t*>(buf.data()), out_len); | ||
out_len *= 4; | ||
out.insert(out.end(), buf.data(), buf.data() + out_len); | ||
} | ||
|
||
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { | ||
assert(n <= m_block_size); | ||
FastPForLib::Simple16<false> codec; | ||
std::array<std::uint32_t, 2 * m_block_size> buf{}; | ||
|
||
auto const* ret = reinterpret_cast<uint8_t const*>( | ||
codec.decodeArray(reinterpret_cast<uint32_t const*>(in), 8 * n, buf.data(), n) | ||
); | ||
|
||
std::copy(buf.begin(), std::next(buf.begin(), n), out); | ||
return ret; | ||
} | ||
|
||
auto block_size() const noexcept -> std::size_t { return m_block_size; } | ||
}; | ||
|
||
}; // namespace pisa |
Oops, something went wrong.