Skip to content

Commit

Permalink
add doc comments and testcases for uniform_key_distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
kuron99 committed Dec 17, 2024
1 parent 5571348 commit b2a0ed2
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 25 deletions.
40 changes: 20 additions & 20 deletions src/jogasaki/dist/uniform_key_distribution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,6 @@ status uniform_key_distribution::highkey(pivot_type& out) {
return scan_one(true, out);
}

double string_distance(std::string_view lo, std::string_view hi) {
std::size_t len = std::max(lo.size(), hi.size());
double dist{};

double div = 1.0;
for(std::size_t i=0; i < len; ++i) {
int l = i < lo.size() ? lo[i] : 0;
int h = i < hi.size() ? hi[i] : 0;
dist += (h - l) * div;
div /= 256.0;
}
return dist;
}

std::size_t common_prefix_len(std::string_view lo, std::string_view hi) {
std::size_t len = std::min(lo.size(), hi.size());
std::size_t i = 0;
Expand All @@ -115,21 +101,35 @@ std::size_t common_prefix_len(std::string_view lo, std::string_view hi) {
}

std::vector<std::string> generate_strings(std::string_view lo, std::string_view hi, std::size_t chars) {
// simple implementation to generate a set of strings contained in the range from lo to hi (exclusive)
// steps are as follows:
// 1. let l and h be the character after the common prefix in lo and hi
// 2. generate strings with prefix + l, prefix + (l + 1), ..., prefix + (h - 1)
// 3. append one of `chars` characters to each string generated in 2
// 4. Within the `chars` * (h - l) strings generated above, adopt only ones in the range from lo to hi
if(hi < lo) {
// invalid arguments
return {};
}
auto cpl = common_prefix_len(lo, hi);

// characters after the common prefix in lo and hi
auto h = static_cast<std::uint8_t>(cpl < hi.size() ? hi[cpl] : 0);
auto l = static_cast<std::uint8_t>(cpl < lo.size() ? lo[cpl] : 0);
std::size_t cnt = h - l;

std::vector<std::string> pivots{};
pivots.reserve(cnt * chars);
pivots.reserve(cnt * chars); // at maximum

for (std::size_t j = 0; j < cnt; ++j) {
for (std::size_t i = 0; i < cnt; ++i) {
std::string prefix{lo.data(), cpl};
prefix += static_cast<char>(l + j);

for (std::size_t i = 0; i < chars; ++i) {
pivots.emplace_back(prefix + static_cast<char>(i));
prefix += static_cast<char>(l + i);
for (std::size_t j = 0; j < chars; ++j) {
std::string p = prefix + static_cast<char>(j);
if(p <= lo || hi <= p) {
continue;
}
pivots.emplace_back(std::move(p));
}
}
return pivots;
Expand Down
19 changes: 16 additions & 3 deletions src/jogasaki/dist/uniform_key_distribution.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ namespace jogasaki::dist {

/**
* @brief key_distribution subclass assuming keys are distributed uniformly
* @details This class assumes that keys are distributed uniformly and calculate the pivots based on this assumption.
* @details This class assumes that keys are distributed uniformly and calculate the pivots
* between the smallest and largest keys.
*/
class uniform_key_distribution : public key_distribution {
public:
Expand Down Expand Up @@ -87,10 +88,22 @@ class uniform_key_distribution : public key_distribution {
status scan_one(bool reverse, uniform_key_distribution::pivot_type& out);
};

double string_distance(std::string_view lo, std::string_view hi);

/**
* @brief calculate the common prefix length of two strings
* @note the function is public for testing
*/
std::size_t common_prefix_len(std::string_view lo, std::string_view hi);

/**
* @brief generate strings between two strings
* @param lo the smaller string
* @param hi the larger string
* @param chars the number of characters consisting a octet (normally 256, but customizable for testing)
* @details the function generates strings between lo and hi (exclusively)
* If the given range is too narrow or invalid (i.e. hi < lo), the function returns an empty vector.
* @return the generated strings
* @note the function is public for testing
*/
std::vector<std::string>
generate_strings(std::string_view lo, std::string_view hi, std::size_t chars = 256);

Expand Down
60 changes: 58 additions & 2 deletions test/jogasaki/dist/uniform_distribution_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class uniform_distribution_test :
};

TEST_F(uniform_distribution_test, basic) {
if (jogasaki::kvs::implementation_id() == "memory") {
GTEST_SKIP() << "jogasaki-memory doesn't support uniform key distribution yet";
}
execute_statement("create table t (c0 int primary key)");
execute_statement("insert into t values (1),(2),(3)");

Expand All @@ -69,12 +72,17 @@ TEST_F(uniform_distribution_test, basic) {
std::string hi{};
std::string lo{};
EXPECT_EQ(status::ok, dist.highkey(hi));
EXPECT_EQ("\x80\x00\x00\x03"sv, hi);
std::cerr << "highkey: " << utils::binary_printer{hi.data(), hi.size()} << std::endl;
EXPECT_EQ(status::ok, dist.lowkey(lo));
EXPECT_EQ("\x80\x00\x00\x01"sv, lo);
std::cerr << "lowkey: " << utils::binary_printer{lo.data(), lo.size()} << std::endl;
}

TEST_F(uniform_distribution_test, complex_primary_key) {
if (jogasaki::kvs::implementation_id() == "memory") {
GTEST_SKIP() << "jogasaki-memory doesn't support uniform key distribution yet";
}
execute_statement("create table t (c0 int, c1 int, primary key(c0, c1))");
execute_statement("insert into t values (1,10),(2,20),(3,30)");

Expand All @@ -91,8 +99,10 @@ TEST_F(uniform_distribution_test, complex_primary_key) {
std::string lo{};
EXPECT_EQ(status::ok, dist.highkey(hi));
std::cerr << "highkey: " << utils::binary_printer{hi.data(), hi.size()} << std::endl;
EXPECT_EQ("\x80\x00\x00\x03\x80\x00\x00\x1e"sv, hi);
EXPECT_EQ(status::ok, dist.lowkey(lo));
std::cerr << "lowkey: " << utils::binary_printer{lo.data(), lo.size()} << std::endl;
EXPECT_EQ("\x80\x00\x00\x01\x80\x00\x00\x0a"sv, lo);
}

TEST_F(uniform_distribution_test, common_prefix_len) {
Expand All @@ -110,9 +120,9 @@ TEST_F(uniform_distribution_test, common_prefix_len) {
EXPECT_EQ(3, common_prefix_len("abcd", "abc"));
}

TEST_F(uniform_distribution_test, gen_strings) {
TEST_F(uniform_distribution_test, gen_strings_basic) {
auto res = generate_strings("a1", "a3", 3);
EXPECT_EQ(6, res.size());
ASSERT_EQ(6, res.size());
EXPECT_EQ("a1\x00"sv, res[0]);
EXPECT_EQ("a1\x01"sv, res[1]);
EXPECT_EQ("a1\x02"sv, res[2]);
Expand All @@ -121,6 +131,52 @@ TEST_F(uniform_distribution_test, gen_strings) {
EXPECT_EQ("a2\x02"sv, res[5]);
}

TEST_F(uniform_distribution_test, gen_strings_removing_ones_outside_range) {
// same as gen_strings_basic but removing strings outside the range
auto res = generate_strings("a1\x01"sv, "a3"sv, 3);
ASSERT_EQ(4, res.size());
EXPECT_EQ("a1\x02"sv, res[0]);
EXPECT_EQ("a2\x00"sv, res[1]);
EXPECT_EQ("a2\x01"sv, res[2]);
EXPECT_EQ("a2\x02"sv, res[3]);
}

TEST_F(uniform_distribution_test, gen_strings_with_different_length) {
auto res = generate_strings("a"sv, "a\x02"sv, 3);
ASSERT_EQ(6, res.size());
EXPECT_EQ("a\x00\x00"sv, res[0]);
EXPECT_EQ("a\x00\x01"sv, res[1]);
EXPECT_EQ("a\x00\x02"sv, res[2]);
EXPECT_EQ("a\x01\x00"sv, res[3]);
EXPECT_EQ("a\x01\x01"sv, res[4]);
EXPECT_EQ("a\x01\x02"sv, res[5]);
}

TEST_F(uniform_distribution_test, gen_strings_with_different_length_longer_lo) {
auto res = generate_strings("a\x01"sv, "b"sv, 3);
ASSERT_EQ(1, res.size());
EXPECT_EQ("a\x02"sv, res[0]);
}

TEST_F(uniform_distribution_test, gen_strings_same_hi_lo) {
auto res = generate_strings("abc"sv, "abc"sv, 3);
ASSERT_EQ(0, res.size());
}

TEST_F(uniform_distribution_test, gen_strings_narrow_range) {
{
// verify that the range is too narrow to generate any strings
auto res = generate_strings("a\x01\xFF"sv, "a\x02"sv, 256);
ASSERT_EQ(0, res.size());
}
{
// verify that the range is narrow and only one string can be generated
auto res = generate_strings("a\x01\xFE"sv, "a\x02"sv, 256);
ASSERT_EQ(1, res.size());
EXPECT_EQ("a\x01\xFF"sv, res[0]);
}
}

TEST_F(uniform_distribution_test, compute_pivots) {
if (jogasaki::kvs::implementation_id() == "memory") {
GTEST_SKIP() << "jogasaki-memory doesn't support uniform key distribution yet";
Expand Down

0 comments on commit b2a0ed2

Please sign in to comment.