Skip to content

Commit

Permalink
Merge branch 'sideplugin-8.04.0-2023-06-20-2926e071' into sideplugin-…
Browse files Browse the repository at this point in the history
…8.11.0-2023-12-17-5b981b64
  • Loading branch information
rockeet committed Dec 23, 2023
2 parents 6d789a8 + 8db0b8d commit 98320a2
Show file tree
Hide file tree
Showing 21 changed files with 123 additions and 40 deletions.
2 changes: 1 addition & 1 deletion db/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ Status BuildTable(
if (s.ok() && !output_validator.CompareValidator(file_validator)) {
#if !defined(ROCKSDB_UNIT_TEST)
auto& fd = meta->fd;
ROCKSDB_DIE("BuildTable: Paranoid checksums do not match(%d/%lld.sst)",
ROCKSDB_DIE("BuildTable: Paranoid checksums do not match(%d:%lld.sst)",
fd.GetPathId(), (long long)fd.GetNumber());
#else
s = Status::Corruption("BuildTable: Paranoid checksums do not match");
Expand Down
2 changes: 1 addition & 1 deletion db/dbformat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s,
memcpy(dst, buf, klen_len); dst += klen_len;
memcpy(dst, _user_key.data(), usize);
dst += usize;
if (nullptr != ts) {
if (UNLIKELY(nullptr != ts)) {
memcpy(dst, ts->data(), ts_sz);
dst += ts_sz;
}
Expand Down
61 changes: 35 additions & 26 deletions db/internal_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
#include "util/hash_containers.h"
#include "util/string_util.h"

#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wnonnull" // for boost::replace_all_copy
#endif
#include<boost/algorithm/string.hpp>

namespace ROCKSDB_NAMESPACE {


Expand Down Expand Up @@ -1729,26 +1734,42 @@ void InternalStats::DumpDBMapStatsWriteStall(
}
}

static void DumpWriteStalls(std::ostringstream& str,
std::map<std::string, std::string>& stats_map) {
str << "Write Stall (count): ";

for (auto iter = stats_map.begin(); iter != stats_map.end(); ) {
std::string name = boost::replace_all_copy(iter->first, "-delays", "");
str << name << ": delays " << iter->second;
++iter;
if (stats_map.end() == iter) {
break; // should not goes here, check for safe
}
std::string name2 = boost::replace_all_copy(iter->first, "-stops", "");
if (name2 == name) {
str << ", stops " << iter->second;
}
else { // should not goes here
str << iter->first << ": " << iter->second;
}
auto next = std::next(iter);
if (stats_map.end() == next) {
str << "\n";
} else {
str << " | ";
}
iter = next;
}
}

void InternalStats::DumpDBStatsWriteStall(std::string* value) {
assert(value);

std::map<std::string, std::string> write_stall_stats_map;
DumpDBMapStatsWriteStall(&write_stall_stats_map);

std::ostringstream str;
str << "Write Stall (count): ";

for (auto write_stall_stats_map_iter = write_stall_stats_map.begin();
write_stall_stats_map_iter != write_stall_stats_map.end();
write_stall_stats_map_iter++) {
const auto& name_and_stat = *write_stall_stats_map_iter;
str << name_and_stat.first << ": " << name_and_stat.second;
if (std::next(write_stall_stats_map_iter) == write_stall_stats_map.end()) {
str << "\n";
} else {
str << ", ";
}
}
DumpWriteStalls(str, write_stall_stats_map);
*value = str.str();
}

Expand Down Expand Up @@ -1935,19 +1956,7 @@ void InternalStats::DumpCFStatsWriteStall(std::string* value,
DumpCFMapStatsWriteStall(&write_stall_stats_map);

std::ostringstream str;
str << "Write Stall (count): ";

for (auto write_stall_stats_map_iter = write_stall_stats_map.begin();
write_stall_stats_map_iter != write_stall_stats_map.end();
write_stall_stats_map_iter++) {
const auto& name_and_stat = *write_stall_stats_map_iter;
str << name_and_stat.first << ": " << name_and_stat.second;
if (std::next(write_stall_stats_map_iter) == write_stall_stats_map.end()) {
str << "\n";
} else {
str << ", ";
}
}
DumpWriteStalls(str, write_stall_stats_map);

if (total_stall_count) {
*total_stall_count =
Expand Down
1 change: 1 addition & 0 deletions db/memtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ ImmutableMemTableOptions::ImmutableMemTableOptions(
mutable_cf_options.memtable_prefix_bloom_size_ratio) *
8u),
memtable_huge_page_size(mutable_cf_options.memtable_huge_page_size),
allow_merge_memtables(mutable_cf_options.allow_merge_memtables),
memtable_whole_key_filtering(
mutable_cf_options.memtable_whole_key_filtering),
inplace_update_support(ioptions.inplace_update_support),
Expand Down
1 change: 1 addition & 0 deletions db/memtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct ImmutableMemTableOptions {
size_t arena_block_size;
uint32_t memtable_prefix_bloom_bits;
size_t memtable_huge_page_size;
bool allow_merge_memtables;
bool memtable_whole_key_filtering;
bool inplace_update_support;
size_t inplace_update_num_locks;
Expand Down
6 changes: 6 additions & 0 deletions db/memtable_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id,
std::max(m->GetNextLogNumber(), *max_next_log_number);
}
ret->push_back(m);
if (!m->GetImmutableMemTableOptions()->allow_merge_memtables) {
break;
}
if (m->SupportConvertToSST()) {
break;
}
} else if (!ret->empty()) {
// This `break` is necessary to prevent picking non-consecutive memtables
// in case `memlist` has one or more entries with
Expand Down
50 changes: 42 additions & 8 deletions db/output_validator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,18 @@
#include "test_util/sync_point.h"
#include "util/hash.h"
#include <terark/fstring.hpp>
#include <terark/io/var_int.hpp>

namespace ROCKSDB_NAMESPACE {

using terark::fstring;
static bool g_full_check = terark::getEnvBool("OutputValidator_full_check");

void OutputValidator::Init() {
full_check_ = g_full_check;
if (full_check_) {
kv_vec_.reserve(32 << 20); // 32M
}
if (icmp_.IsForwardBytewise())
m_add = &OutputValidator::Add_tpl<BytewiseCompareInternalKey>;
else if (icmp_.IsReverseBytewise())
Expand Down Expand Up @@ -48,21 +54,49 @@ Status OutputValidator::Add_tpl(const Slice key, const Slice value) {
memcpy(prev_key_.data(), key.data(), key.size());
#endif
}
if (g_full_check) {
kv_vec_.emplace_back(key.ToString(), value.ToString());
if (full_check_) {
auto WriteSlice = [this](Slice s) {
unsigned char buf[16];
size_t len = terark::save_var_uint64(buf, s.size_) - buf;
kv_vec_.append(buf, len);
kv_vec_.append(s.data_, s.size_);
};
WriteSlice(key);
WriteSlice(value);
}
num_kv_++;
return Status::OK();
}

static Slice ReadSlice(const unsigned char** ptr) {
size_t len = (size_t)terark::load_var_uint64(*ptr, ptr);
auto data = (const char*)(*ptr);
*ptr += len;
return Slice(data, len);
}

bool OutputValidator::CompareValidator(const OutputValidator& other) {
if (g_full_check) {
if (full_check_) {
long long file_number = m_file_number ? m_file_number : other.m_file_number;
ROCKSDB_VERIFY_EQ(kv_vec_.size(), other.kv_vec_.size());
for (size_t i = 0, n = kv_vec_.size(); i < n; i++) {
#define hex(deref, field) ParsedInternalKey(deref kv_vec_[i].field).DebugString(true, true).c_str()
ROCKSDB_VERIFY_F(kv_vec_[i].first == other.kv_vec_[i].first , "%06lld.sst[%zd]: %s %s", file_number, i, hex(,first ), hex(other., first ));
ROCKSDB_VERIFY_F(kv_vec_[i].second == other.kv_vec_[i].second, "%06lld.sst[%zd]: %s %s", file_number, i, hex(,second), hex(other., second));
if (kv_vec_.size() != other.kv_vec_.size()) {
fprintf(stderr,
"FATAL: OutputValidator::CompareValidator: kv_vec_.size: %zd != %zd\n",
kv_vec_.size(), other.kv_vec_.size());
}
ROCKSDB_VERIFY_EQ(num_kv_, other.num_kv_);
const unsigned char* x_reader = kv_vec_.begin();
const unsigned char* y_reader = other.kv_vec_.begin();
for (size_t i = 0, n = num_kv_; i < n; i++) {
Slice kx = ReadSlice(&x_reader);
Slice vx = ReadSlice(&x_reader);
Slice ky = ReadSlice(&y_reader);
Slice vy = ReadSlice(&y_reader);
#define HexKey(key) ParsedInternalKey(key).DebugString(true, true).c_str()
ROCKSDB_VERIFY_F(kx == ky, "%06lld.sst[%zd]: %zd(%s) %zd(%s)", file_number, i, kx.size_, HexKey(kx), ky.size_, HexKey(ky));
ROCKSDB_VERIFY_F(vx == vy, "%06lld.sst[%zd]: %zd(%s) %zd(%s)", file_number, i, vx.size_, vx.hex().c_str(), vy.size_, vy.hex().c_str());
}
ROCKSDB_VERIFY_EQ(x_reader, kv_vec_.end());
ROCKSDB_VERIFY_EQ(y_reader, other.kv_vec_.end());
ROCKSDB_VERIFY_EQ(GetHash(), other.GetHash());
}
return GetHash() == other.GetHash();
Expand Down
5 changes: 4 additions & 1 deletion db/output_validator.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "db/dbformat.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include <terark/valvec.hpp>
#include <terark/valvec32.hpp>

namespace ROCKSDB_NAMESPACE {
Expand Down Expand Up @@ -53,6 +54,8 @@ class OutputValidator {
uint64_t paranoid_hash_ = 0;
bool enable_order_check_;
bool enable_hash_;
std::vector<std::pair<std::string, std::string> > kv_vec_;
bool full_check_ = false;
size_t num_kv_ = 0;
terark::valvec<unsigned char> kv_vec_;
};
} // namespace ROCKSDB_NAMESPACE
1 change: 1 addition & 0 deletions db_stress_tool/db_stress_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ DECLARE_int32(max_write_buffer_number_to_maintain);
DECLARE_int64(max_write_buffer_size_to_maintain);
DECLARE_bool(use_write_buffer_manager);
DECLARE_double(memtable_prefix_bloom_size_ratio);
DECLARE_bool(allow_merge_memtables);
DECLARE_bool(memtable_whole_key_filtering);
DECLARE_int32(open_files);
DECLARE_uint64(compressed_secondary_cache_size);
Expand Down
4 changes: 4 additions & 0 deletions db_stress_tool/db_stress_gflags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ DEFINE_double(memtable_prefix_bloom_size_ratio,
"creates prefix blooms for memtables, each with size "
"`write_buffer_size * memtable_prefix_bloom_size_ratio`.");

DEFINE_bool(allow_merge_memtables,
ROCKSDB_NAMESPACE::Options().allow_merge_memtables,
"allow merge memtables on flush.");

DEFINE_bool(memtable_whole_key_filtering,
ROCKSDB_NAMESPACE::Options().memtable_whole_key_filtering,
"Enable whole key filtering in memtables.");
Expand Down
3 changes: 3 additions & 0 deletions include/rocksdb/advanced_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,9 @@ struct AdvancedColumnFamilyOptions {
// Dynamically changeable through SetOptions() API
double memtable_prefix_bloom_size_ratio = 0.0;

// set as false to avoid flush output large SSTs
bool allow_merge_memtables = true;

// Enable whole key bloom filter in memtable. Note this will only take effect
// if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering
// can potentially reduce CPU usage for point-look-ups.
Expand Down
7 changes: 7 additions & 0 deletions options/cf_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
{"memtable_prefix_bloom_probes",
{0, OptionType::kUInt32T, OptionVerificationType::kDeprecated,
OptionTypeFlags::kMutable}},
{"allow_merge_memtables",
{offsetof(struct MutableCFOptions, allow_merge_memtables),
OptionType::kBoolean, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
{"memtable_whole_key_filtering",
{offsetof(struct MutableCFOptions, memtable_whole_key_filtering),
OptionType::kBoolean, OptionVerificationType::kNormal,
Expand Down Expand Up @@ -1058,6 +1062,9 @@ void MutableCFOptions::Dump(Logger* log) const {
arena_block_size);
ROCKS_LOG_INFO(log, " memtable_prefix_bloom_ratio: %f",
memtable_prefix_bloom_size_ratio);

ROCKS_LOG_INFO(log, " allow_merge_memtables: %d",
allow_merge_memtables);
ROCKS_LOG_INFO(log, " memtable_whole_key_filtering: %d",
memtable_whole_key_filtering);
ROCKS_LOG_INFO(log,
Expand Down
3 changes: 3 additions & 0 deletions options/cf_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ struct MutableCFOptions {
arena_block_size(options.arena_block_size),
memtable_prefix_bloom_size_ratio(
options.memtable_prefix_bloom_size_ratio),
allow_merge_memtables(options.allow_merge_memtables),
memtable_whole_key_filtering(options.memtable_whole_key_filtering),
memtable_huge_page_size(options.memtable_huge_page_size),
max_successive_merges(options.max_successive_merges),
Expand Down Expand Up @@ -193,6 +194,7 @@ struct MutableCFOptions {
max_write_buffer_number(0),
arena_block_size(0),
memtable_prefix_bloom_size_ratio(0),
allow_merge_memtables(true),
memtable_whole_key_filtering(false),
memtable_huge_page_size(0),
max_successive_merges(0),
Expand Down Expand Up @@ -261,6 +263,7 @@ struct MutableCFOptions {
int max_write_buffer_number;
size_t arena_block_size;
double memtable_prefix_bloom_size_ratio;
bool allow_merge_memtables;
bool memtable_whole_key_filtering;
size_t memtable_huge_page_size;
size_t max_successive_merges;
Expand Down
4 changes: 4 additions & 0 deletions options/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
inplace_callback(options.inplace_callback),
memtable_prefix_bloom_size_ratio(
options.memtable_prefix_bloom_size_ratio),
allow_merge_memtables(options.allow_merge_memtables),
memtable_whole_key_filtering(options.memtable_whole_key_filtering),
memtable_huge_page_size(options.memtable_huge_page_size),
memtable_insert_with_hint_prefix_extractor(
Expand Down Expand Up @@ -397,6 +398,9 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
ROCKS_LOG_HEADER(
log, " Options.memtable_prefix_bloom_size_ratio: %f",
memtable_prefix_bloom_size_ratio);
ROCKS_LOG_HEADER(log,
" Options.allow_merge_memtables: %d",
allow_merge_memtables);
ROCKS_LOG_HEADER(log,
" Options.memtable_whole_key_filtering: %d",
memtable_whole_key_filtering);
Expand Down
1 change: 1 addition & 0 deletions options/options_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
cf_opts->arena_block_size = moptions.arena_block_size;
cf_opts->memtable_prefix_bloom_size_ratio =
moptions.memtable_prefix_bloom_size_ratio;
cf_opts->allow_merge_memtables = moptions.allow_merge_memtables;
cf_opts->memtable_whole_key_filtering = moptions.memtable_whole_key_filtering;
cf_opts->memtable_huge_page_size = moptions.memtable_huge_page_size;
cf_opts->max_successive_merges = moptions.max_successive_merges;
Expand Down
1 change: 1 addition & 0 deletions options/options_settable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
"merge_operator=aabcxehazrMergeOperator;"
"memtable_prefix_bloom_size_ratio=0.4642;"
"memtable_whole_key_filtering=true;"
"allow_merge_memtables=true;"
"memtable_insert_with_hint_prefix_extractor=rocksdb.CappedPrefix.13;"
"check_flush_compaction_key_order=false;"
"paranoid_file_checks=true;"
Expand Down
2 changes: 1 addition & 1 deletion sideplugin/rockside
4 changes: 2 additions & 2 deletions table/iterator_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ class IteratorWrapperBase {
// Iterator interface methods
bool Valid() const {
#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
status_checked_after_invalid_ = valid_;
status_checked_after_invalid_ = result.is_valid;
#endif
return result_.valid_;
return result_.is_valid;
}
Slice key() const {
assert(Valid());
Expand Down
3 changes: 3 additions & 0 deletions tools/db_bench_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,8 @@ DEFINE_bool(use_ribbon_filter, false, "Use Ribbon instead of Bloom filter");
DEFINE_double(memtable_bloom_size_ratio, 0,
"Ratio of memtable size used for bloom filter. 0 means no bloom "
"filter.");
DEFINE_bool(allow_merge_memtables, true,
"allow merge memtables on flush.");
DEFINE_bool(memtable_whole_key_filtering, false,
"Try to use whole key bloom filter in memtables.");
DEFINE_bool(memtable_use_huge_page, false,
Expand Down Expand Up @@ -4286,6 +4288,7 @@ class Benchmark {
}
options.memtable_huge_page_size = FLAGS_memtable_use_huge_page ? 2048 : 0;
options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_bloom_size_ratio;
options.allow_merge_memtables = FLAGS_allow_merge_memtables;
options.memtable_whole_key_filtering = FLAGS_memtable_whole_key_filtering;
if (FLAGS_memtable_insert_with_hint_prefix_size > 0) {
options.memtable_insert_with_hint_prefix_extractor.reset(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results.
1 change: 1 addition & 0 deletions unreleased_history/bug_fixes/fsbuffer_bug_fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix use_after_free bug in async_io MultiReads when underlying FS enabled kFSBuffer. kFSBuffer is when underlying FS pass their own buffer instead of using RocksDB scratch in FSReadRequest. Right now it's an experimental feature.

0 comments on commit 98320a2

Please sign in to comment.