Test of dynamic dispatch for block codec

pisa-engine · Mar 23, 2024 · f1d5888 · f1d5888
1 parent ce97f7b
commit f1d5888
Show file tree

Hide file tree

Showing 4 changed files with 733 additions and 0 deletions.
diff --git a/include/pisa/block_inverted_index.hpp b/include/pisa/block_inverted_index.hpp
@@ -0,0 +1,307 @@
+#pragma once
+
+#include "bit_vector.hpp"
+#include "codec/block_codec.hpp"
+#include "codec/block_codecs.hpp"
+#include "codec/compact_elias_fano.hpp"
+#include "concepts.hpp"
+#include "concepts/inverted_index.hpp"
+#include "global_parameters.hpp"
+#include "mappable/mappable_vector.hpp"
+#include "mappable/mapper.hpp"
+#include "memory_source.hpp"
+
+namespace pisa {
+
+class BlockInvertedIndexCursor {
+  public:
+    BlockInvertedIndexCursor(BlockCodec const* block_codec, std::uint8_t const* data, std::uint64_t universe)
+        : m_base(TightVariableByte::decode(data, &m_n, 1)),
+          m_blocks(ceil_div(m_n, block_codec->block_size())),
+          m_block_maxs(m_base),
+          m_block_endpoints(m_block_maxs + 4 * m_blocks),
+          m_blocks_data(m_block_endpoints + 4 * (m_blocks - 1)),
+          m_universe(universe),
+          m_block_codec(block_codec),
+          m_block_size(block_codec->block_size()) {
+        PISA_ASSERT_CONCEPT(
+            (concepts::FrequencyPostingCursor<BlockInvertedIndexCursor>
+             && concepts::SortedPostingCursor<BlockInvertedIndexCursor>)
+        );
+
+        m_docs_buf.resize(m_block_size);
+        m_freqs_buf.resize(m_block_size);
+        reset();
+    }
+
+    void reset() { decode_docs_block(0); }
+
+    void PISA_ALWAYSINLINE next() {
+        ++m_pos_in_block;
+        if PISA_UNLIKELY (m_pos_in_block == m_cur_block_size) {
+            if (m_cur_block + 1 == m_blocks) {
+                m_cur_docid = m_universe;
+                return;
+            }
+            decode_docs_block(m_cur_block + 1);
+        } else {
+            m_cur_docid += m_docs_buf[m_pos_in_block] + 1;
+        }
+    }
+
+    /**
+     * Moves to the next document, counting from the current position,
+     * with the ID equal to or greater than `lower_bound`.
+     *
+     * In particular, if called with a value that is less than or equal
+     * to the current document ID, the position will not change.
+     */
+    void PISA_ALWAYSINLINE next_geq(uint64_t lower_bound) {
+        if PISA_UNLIKELY (lower_bound > m_cur_block_max) {
+            // binary search seems to perform worse here
+            if (lower_bound > block_max(m_blocks - 1)) {
+                m_cur_docid = m_universe;
+                return;
+            }
+
+            uint64_t block = m_cur_block + 1;
+            while (block_max(block) < lower_bound) {
+                ++block;
+            }
+
+            decode_docs_block(block);
+        }
+
+        while (docid() < lower_bound) {
+            m_cur_docid += m_docs_buf[++m_pos_in_block] + 1;
+            assert(m_pos_in_block < m_cur_block_size);
+        }
+    }
+
+    void PISA_ALWAYSINLINE move(uint64_t pos) {
+        assert(pos >= position());
+        uint64_t block = pos / m_block_size;
+        if PISA_UNLIKELY (block != m_cur_block) {
+            decode_docs_block(block);
+        }
+        while (position() < pos) {
+            m_cur_docid += m_docs_buf[++m_pos_in_block] + 1;
+        }
+    }
+
+    uint64_t docid() const { return m_cur_docid; }
+
+    uint64_t PISA_ALWAYSINLINE freq() {
+        if (!m_freqs_decoded) {
+            decode_freqs_block();
+        }
+        return m_freqs_buf[m_pos_in_block] + 1;
+    }
+
+    uint64_t PISA_ALWAYSINLINE value() { return freq(); }
+
+    uint64_t position() const { return m_cur_block * m_block_size + m_pos_in_block; }
+
+    uint64_t size() const noexcept { return m_n; }
+
+    uint64_t num_blocks() const { return m_blocks; }
+
+    uint64_t stats_freqs_size() const {
+        // XXX rewrite in terms of get_blocks()
+        uint64_t bytes = 0;
+        uint8_t const* ptr = m_blocks_data;
+        static const uint64_t block_size = m_block_size;
+        std::vector<uint32_t> buf(block_size);
+        for (size_t b = 0; b < m_blocks; ++b) {
+            uint32_t cur_block_size =
+                ((b + 1) * block_size <= size()) ? block_size : (size() % block_size);
+
+            uint32_t cur_base = (b != 0U ? block_max(b - 1) : uint32_t(-1)) + 1;
+            uint8_t const* freq_ptr = m_block_codec->decode(
+                ptr, buf.data(), block_max(b) - cur_base - (cur_block_size - 1), cur_block_size
+            );
+            ptr = m_block_codec->decode(freq_ptr, buf.data(), uint32_t(-1), cur_block_size);
+            bytes += ptr - freq_ptr;
+        }
+
+        return bytes;
+    }
+
+    struct block_data {
+        uint32_t index;
+        uint32_t max;
+        uint32_t size;
+        uint32_t doc_gaps_universe;
+        uint8_t const* docs_begin;
+        uint8_t const* freqs_begin;
+        uint8_t const* end;
+        BlockCodec const* block_codec;
+
+        void append_docs_block(std::vector<uint8_t>& out) const {
+            out.insert(out.end(), docs_begin, freqs_begin);
+        }
+
+        void append_freqs_block(std::vector<uint8_t>& out) const {
+            out.insert(out.end(), freqs_begin, end);
+        }
+
+        void decode_doc_gaps(std::vector<uint32_t>& out) const {
+            out.resize(size);
+            block_codec->decode(docs_begin, out.data(), doc_gaps_universe, size);
+        }
+
+        void decode_freqs(std::vector<uint32_t>& out) const {
+            out.resize(size);
+            block_codec->decode(freqs_begin, out.data(), uint32_t(-1), size);
+        }
+    };
+
+    std::vector<block_data> get_blocks() {
+        std::vector<block_data> blocks;
+
+        uint8_t const* ptr = m_blocks_data;
+        static const uint64_t block_size = m_block_size;
+        std::vector<uint32_t> buf(block_size);
+        for (size_t b = 0; b < m_blocks; ++b) {
+            blocks.emplace_back();
+            uint32_t cur_block_size =
+                ((b + 1) * block_size <= size()) ? block_size : (size() % block_size);
+
+            uint32_t cur_base = (b != 0U ? block_max(b - 1) : uint32_t(-1)) + 1;
+            uint32_t gaps_universe = block_max(b) - cur_base - (cur_block_size - 1);
+
+            blocks.back().index = b;
+            blocks.back().size = cur_block_size;
+            blocks.back().docs_begin = ptr;
+            blocks.back().doc_gaps_universe = gaps_universe;
+            blocks.back().max = block_max(b);
+            blocks.back().block_codec = m_block_codec;
+
+            uint8_t const* freq_ptr =
+                m_block_codec->decode(ptr, buf.data(), gaps_universe, cur_block_size);
+            blocks.back().freqs_begin = freq_ptr;
+            ptr = m_block_codec->decode(freq_ptr, buf.data(), uint32_t(-1), cur_block_size);
+            blocks.back().end = ptr;
+        }
+
+        assert(blocks.size() == num_blocks());
+        return blocks;
+    }
+
+  private:
+    uint32_t block_max(uint32_t block) const { return ((uint32_t const*)m_block_maxs)[block]; }
+
+    void PISA_NOINLINE decode_docs_block(uint64_t block) {
+        static const uint64_t block_size = m_block_size;
+        uint32_t endpoint = block != 0U ? ((uint32_t const*)m_block_endpoints)[block - 1] : 0;
+        uint8_t const* block_data = m_blocks_data + endpoint;
+        m_cur_block_size = ((block + 1) * block_size <= size()) ? block_size : (size() % block_size);
+        uint32_t cur_base = (block != 0U ? block_max(block - 1) : uint32_t(-1)) + 1;
+        m_cur_block_max = block_max(block);
+        m_freqs_block_data = m_block_codec->decode(
+            block_data, m_docs_buf.data(), m_cur_block_max - cur_base - (m_cur_block_size - 1), m_cur_block_size
+        );
+        intrinsics::prefetch(m_freqs_block_data);
+
+        m_docs_buf[0] += cur_base;
+
+        m_cur_block = block;
+        m_pos_in_block = 0;
+        m_cur_docid = m_docs_buf[0];
+        m_freqs_decoded = false;
+    }
+
+    void PISA_NOINLINE decode_freqs_block() {
+        uint8_t const* next_block = m_block_codec->decode(
+            m_freqs_block_data, m_freqs_buf.data(), uint32_t(-1), m_cur_block_size
+        );
+        intrinsics::prefetch(next_block);
+        m_freqs_decoded = true;
+    }
+
+    uint32_t m_n{0};
+    uint8_t const* m_base;
+    uint32_t m_blocks;
+    uint8_t const* m_block_maxs;
+    uint8_t const* m_block_endpoints;
+    uint8_t const* m_blocks_data;
+    uint64_t m_universe;
+
+    uint32_t m_cur_block{0};
+    uint32_t m_pos_in_block{0};
+    uint32_t m_cur_block_max{0};
+    uint32_t m_cur_block_size{0};
+    uint32_t m_cur_docid{0};
+
+    uint8_t const* m_freqs_block_data{nullptr};
+    bool m_freqs_decoded{false};
+
+    std::vector<uint32_t> m_docs_buf;
+    std::vector<uint32_t> m_freqs_buf;
+    BlockCodec const* m_block_codec;
+    std::size_t m_block_size;
+};
+
+class BlockInvertedIndex {
+  private:
+    global_parameters m_params;
+    std::size_t m_size{0};
+    std::size_t m_num_docs{0};
+    bit_vector m_endpoints;
+    mapper::mappable_vector<std::uint8_t> m_lists;
+    MemorySource m_source;
+    std::unique_ptr<BlockCodec> m_block_codec;
+
+  public:
+    using document_enumerator = BlockInvertedIndexCursor;
+
+    explicit BlockInvertedIndex(MemorySource source, std::unique_ptr<BlockCodec> block_codec)
+        : m_source(std::move(source)), m_block_codec(std::move(block_codec)) {
+        PISA_ASSERT_CONCEPT(
+            (concepts::SortedInvertedIndex<BlockInvertedIndex, BlockInvertedIndexCursor>)
+        );
+        mapper::map(*this, m_source.data(), mapper::map_flags::warmup);
+    }
+
+    template <typename Visitor>
+    void map(Visitor& visit) {
+        visit(m_params, "m_params")(m_size, "m_size")(m_num_docs, "m_num_docs")(
+            m_endpoints, "m_endpoints")(m_lists, "m_lists");
+    }
+
+    [[nodiscard]] auto operator[](std::size_t term_id) const -> BlockInvertedIndexCursor {
+        // check_term_range(term_id);
+        compact_elias_fano::enumerator endpoints(m_endpoints, 0, m_lists.size(), m_size, m_params);
+        auto endpoint = endpoints.move(term_id).second;
+        return BlockInvertedIndexCursor(m_block_codec.get(), m_lists.data() + endpoint, num_docs());
+    }
+
+    /**
+     * \returns  The size of the index, i.e., the number of terms (posting lists).
+     */
+    [[nodiscard]] std::size_t size() const noexcept { return m_size; }
+
+    /**
+     * \returns  The number of distinct documents in the index.
+     */
+    [[nodiscard]] std::uint64_t num_docs() const noexcept { return m_num_docs; }
+
+    void warmup(std::size_t term_id) const {
+        // check_term_range(term_id);
+        compact_elias_fano::enumerator endpoints(m_endpoints, 0, m_lists.size(), m_size, m_params);
+
+        auto begin = endpoints.move(term_id).second;
+        auto end = m_lists.size();
+        if (term_id + 1 != size()) {
+            end = endpoints.move(term_id + 1).second;
+        }
+
+        volatile std::uint32_t tmp;
+        for (std::size_t i = begin; i != end; ++i) {
+            tmp = m_lists[i];
+        }
+        (void)tmp;
+    }
+};
+
+};  // namespace pisa
diff --git a/include/pisa/codec/block_codec.hpp b/include/pisa/codec/block_codec.hpp
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "codec/block_codecs.hpp"
+#include "util/util.hpp"
+
+extern "C" {
+#include "simdcomp/include/simdbitpacking.h"
+}
+
+namespace pisa {
+
+class BlockCodec {
+  public:
+    virtual void encode(
+        std::uint32_t const* in, std::uint32_t sum_of_values, std::size_t n, std::vector<uint8_t>& out
+    ) const = 0;
+
+    virtual std::uint8_t const* decode(
+        std::uint8_t const* in, std::uint32_t* out, std::uint32_t sum_of_values, std::size_t n
+    ) const = 0;
+
+    [[nodiscard]] virtual auto block_size() const noexcept -> std::size_t = 0;
+};
+
+class SimdBpBlockCodec: public BlockCodec {
+    static constexpr std::uint64_t m_block_size = 128;
+
+    void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const {
+        assert(n <= m_block_size);
+        auto* src = const_cast<uint32_t*>(in);
+        if (n < m_block_size) {
+            interpolative_block::encode(src, sum_of_values, n, out);
+            return;
+        }
+        uint32_t b = maxbits(in);
+        thread_local std::vector<uint8_t> buf(8 * n);
+        uint8_t* buf_ptr = buf.data();
+        *buf_ptr++ = b;
+        simdpackwithoutmask(src, (__m128i*)buf_ptr, b);
+        out.insert(out.end(), buf.data(), buf.data() + b * sizeof(__m128i) + 1);
+    }
+
+    uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const {
+        assert(n <= m_block_size);
+        if PISA_UNLIKELY (n < m_block_size) {
+            return interpolative_block::decode(in, out, sum_of_values, n);
+        }
+        uint32_t b = *in++;
+        simdunpack((const __m128i*)in, out, b);
+        return in + b * sizeof(__m128i);
+    }
+
+    auto block_size() const noexcept -> std::size_t { return m_block_size; }
+};
+
+};  // namespace pisa
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
@@ -10,6 +10,7 @@ target_link_libraries(app pisa CLI11)
 add_tool(compress_inverted_index compress_inverted_index.cpp)
 add_tool(create_wand_data create_wand_data.cpp)
 add_tool(queries queries.cpp)
+add_tool(queries_dynamic queries_dynamic.cpp)
 add_tool(evaluate_queries evaluate_queries.cpp)
 add_tool(thresholds thresholds.cpp)
 add_tool(profile_queries profile_queries.cpp)