Skip to content

Commit

Permalink
Improve resolving block codecs
Browse files Browse the repository at this point in the history
  • Loading branch information
elshize committed Mar 31, 2024
1 parent 599dc04 commit f20581b
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 46 deletions.
12 changes: 12 additions & 0 deletions include/pisa/codec/block_codec_registry.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include <memory>
#include <string_view>

#include "codec/block_codec.hpp"

namespace pisa {

[[nodiscard]] auto get_block_codec(std::string_view name) -> std::unique_ptr<BlockCodec>;

} // namespace pisa
3 changes: 3 additions & 0 deletions include/pisa/codec/interpolative.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <string_view>
#include <vector>

#include "codec/block_codec.hpp"
Expand All @@ -10,6 +11,8 @@ class InterpolativeBlockCodec: public BlockCodec {
static constexpr std::uint64_t m_block_size = 128;

public:
constexpr static std::string_view name = "block_interpolative";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
2 changes: 2 additions & 0 deletions include/pisa/codec/maskedvbyte.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class MaskedVByteBlockCodec: public BlockCodec {
static constexpr std::uint64_t m_overflow = 512;

public:
constexpr static std::string_view name = "block_maskedvbyte";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
2 changes: 2 additions & 0 deletions include/pisa/codec/optpfor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class OptPForBlockCodec: public BlockCodec {
static const uint64_t m_block_size = Codec::BlockSize;

public:
constexpr static std::string_view name = "block_optpfor";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
2 changes: 2 additions & 0 deletions include/pisa/codec/qmx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class QmxBlockCodec: public BlockCodec {
static constexpr std::uint64_t m_overflow = 512;

public:
constexpr static std::string_view name = "block_qmx";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
2 changes: 2 additions & 0 deletions include/pisa/codec/simdbp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class SimdBpBlockCodec: public BlockCodec {
static constexpr std::uint64_t m_block_size = 128;

public:
constexpr static std::string_view name = "block_simdbp";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
2 changes: 2 additions & 0 deletions include/pisa/codec/simple16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class Simple16BlockCodec: public BlockCodec {
static constexpr std::uint64_t m_block_size = 128;

public:
constexpr static std::string_view name = "block_simple16";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
2 changes: 2 additions & 0 deletions include/pisa/codec/simple8b.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class Simple8bBlockCodec: public BlockCodec {
static constexpr std::uint64_t m_block_size = 128;

public:
constexpr static std::string_view name = "block_simple8b";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
3 changes: 3 additions & 0 deletions include/pisa/codec/streamvbyte.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <array>
#include <cassert>
#include <cstdint>
#include <string_view>
#include <vector>

#include "codec/block_codec.hpp"
Expand Down Expand Up @@ -43,6 +44,8 @@ class StreamVByteBlockCodec: public BlockCodec {
pisa::streamvbyte_max_compressedbytes(m_block_size);

public:
constexpr static std::string_view name = "block_streamvbyte";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
3 changes: 3 additions & 0 deletions include/pisa/codec/varint_g8iu.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <string_view>
#include <vector>

#include "codec/block_codec.hpp"
Expand All @@ -10,6 +11,8 @@ class VarintG8IUBlockCodec: public BlockCodec {
static const uint64_t m_block_size = 128;

public:
constexpr static std::string_view name = "block_varintg8iu";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
2 changes: 2 additions & 0 deletions include/pisa/codec/varintgb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ class VarintGbBlockCodec: public BlockCodec {
static constexpr std::uint64_t m_block_size = 128;

public:
constexpr static std::string_view name = "block_varintgb";

void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const;
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const;
auto block_size() const noexcept -> std::size_t { return m_block_size; }
Expand Down
64 changes: 64 additions & 0 deletions src/codec/block_codec_registry.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include "codec/block_codec_registry.hpp"

#include <algorithm>
#include <array>
#include <memory>
#include <string_view>

#include <fmt/format.h>

#include "codec/block_codec.hpp"
#include "codec/interpolative.hpp"
#include "codec/maskedvbyte.hpp"
#include "codec/optpfor.hpp"
#include "codec/qmx.hpp"
#include "codec/simdbp.hpp"
#include "codec/simple16.hpp"
#include "codec/simple8b.hpp"
#include "codec/streamvbyte.hpp"
#include "codec/varint_g8iu.hpp"
#include "codec/varintgb.hpp"

namespace pisa {

template <typename... C>
class BlockCodecRegistry {
using BlockCodecConstructor = std::unique_ptr<BlockCodec> (*)();

constexpr static std::array<std::string_view, sizeof...(C)> m_names =
std::array<std::string_view, sizeof...(C)>{C::name...};

constexpr static std::array<BlockCodecConstructor, sizeof...(C)> m_constructors =
std::array<BlockCodecConstructor, sizeof...(C)>{[]() -> std::unique_ptr<BlockCodec> {
return std::make_unique<C>();
}...};

public:
constexpr static auto count() -> std::size_t { return sizeof...(C); }
static auto get(std::string_view name) -> std::unique_ptr<BlockCodec> {
auto pos = std::find(m_names.begin(), m_names.end(), name);
if (pos == m_names.end()) {
throw std::domain_error(fmt::format("invalid codec: {}", name));
}
auto constructor = m_constructors[std::distance(m_names.begin(), pos)];
return constructor();
}
};

using BlockCodecs = BlockCodecRegistry<
InterpolativeBlockCodec,
MaskedVByteBlockCodec,
OptPForBlockCodec,
QmxBlockCodec,
SimdBpBlockCodec,
Simple16BlockCodec,
Simple8bBlockCodec,
StreamVByteBlockCodec,
VarintG8IUBlockCodec,
VarintGbBlockCodec>;

auto get_block_codec(std::string_view name) -> std::unique_ptr<BlockCodec> {
return BlockCodecs::get(name);
}

} // namespace pisa
49 changes: 3 additions & 46 deletions tools/queries_dynamic.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <algorithm>
#include <functional>
#include <iostream>
#include <numeric>
#include <optional>
Expand All @@ -16,17 +17,7 @@
#include "accumulator/simple_accumulator.hpp"
#include "app.hpp"
#include "block_inverted_index.hpp"
#include "codec/block_codec.hpp"
#include "codec/interpolative.hpp"
#include "codec/maskedvbyte.hpp"
#include "codec/optpfor.hpp"
#include "codec/qmx.hpp"
#include "codec/simdbp.hpp"
#include "codec/simple16.hpp"
#include "codec/simple8b.hpp"
#include "codec/streamvbyte.hpp"
#include "codec/varint_g8iu.hpp"
#include "codec/varintgb.hpp"
#include "codec/block_codec_registry.hpp"
#include "cursor/block_max_scored_cursor.hpp"
#include "cursor/cursor.hpp"
#include "cursor/max_scored_cursor.hpp"
Expand Down Expand Up @@ -319,40 +310,6 @@ using wand_raw_index = wand_data<wand_data_raw>;
using wand_uniform_index = wand_data<wand_data_compressed<>>;
using wand_uniform_index_quantized = wand_data<wand_data_compressed<PayloadType::Quantized>>;

auto resolve_codec(std::string_view encoding) -> std::unique_ptr<BlockCodec> {
if (encoding == "block_interpolative") {
return std::make_unique<InterpolativeBlockCodec>();
}
if (encoding == "block_maskedvbyte") {
return std::make_unique<MaskedVByteBlockCodec>();
}
if (encoding == "block_optpfor") {
return std::make_unique<OptPForBlockCodec>();
}
if (encoding == "block_qmx") {
return std::make_unique<QmxBlockCodec>();
}
if (encoding == "block_simdbp") {
return std::make_unique<SimdBpBlockCodec>();
}
if (encoding == "block_simple16") {
return std::make_unique<Simple16BlockCodec>();
}
if (encoding == "block_simple8b") {
return std::make_unique<Simple8bBlockCodec>();
}
if (encoding == "block_streamvbyte") {
return std::make_unique<StreamVByteBlockCodec>();
}
if (encoding == "block_varintg8iu") {
return std::make_unique<VarintG8IUBlockCodec>();
}
if (encoding == "block_varintgb") {
return std::make_unique<VarintGbBlockCodec>();
}
throw std::domain_error("invalid encoding type");
}

int main(int argc, const char** argv) {
bool extract = false;
bool safe = false;
Expand All @@ -379,7 +336,7 @@ int main(int argc, const char** argv) {
}

BlockInvertedIndex index(
MemorySource::mapped_file(app.index_filename()), resolve_codec(app.index_encoding())
MemorySource::mapped_file(app.index_filename()), get_block_codec(app.index_encoding())
);

auto params = std::make_tuple(
Expand Down

0 comments on commit f20581b

Please sign in to comment.