diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index 749a172ac..41ac48a07 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -9,6 +9,8 @@ #include "db/version_edit.h" #include "port/port.h" #include "port/stack_trace.h" +#include "rocksdb/advanced_options.h" +#include "rocksdb/options.h" #include "rocksdb/sst_file_writer.h" #include "test_util/testharness.h" #include "test_util/testutil.h" @@ -1292,6 +1294,80 @@ TEST_F(ExternalSSTFileBasicTest, VerifyChecksumReadahead) { Destroy(options); } +TEST_F(ExternalSSTFileBasicTest, ReadOldValueOfIngestedKeyBug) { + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleUniversal; + options.disable_auto_compactions = true; + options.num_levels = 3; + options.preserve_internal_time_seconds = 36000; + DestroyAndReopen(options); + + // To create the following LSM tree to trigger the bug: + // L0 + // L1 with seqno [1, 2] + // L2 with seqno [3, 4] + + // To create L1 shape + ASSERT_OK( + db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k1", "seqno1")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK( + db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k1", "seqno2")); + ASSERT_OK(db_->Flush(FlushOptions())); + ColumnFamilyMetaData meta_1; + db_->GetColumnFamilyMetaData(&meta_1); + auto& files_1 = meta_1.levels[0].files; + ASSERT_EQ(files_1.size(), 2); + std::string file1 = files_1[0].db_path + files_1[0].name; + std::string file2 = files_1[1].db_path + files_1[1].name; + ASSERT_OK(db_->CompactFiles(CompactionOptions(), {file1, file2}, 1)); + // To confirm L1 shape + ColumnFamilyMetaData meta_2; + db_->GetColumnFamilyMetaData(&meta_2); + ASSERT_EQ(meta_2.levels[0].files.size(), 0); + ASSERT_EQ(meta_2.levels[1].files.size(), 1); + // Seqno starts from non-zero due to seqno reservation for + // preserve_internal_time_seconds greater than 0; + ASSERT_EQ(meta_2.levels[1].files[0].largest_seqno, 102); + ASSERT_EQ(meta_2.levels[2].files.size(), 0); + // To create L2 shape + ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k2overlap", + "old_value")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k2overlap", + "old_value")); + ASSERT_OK(db_->Flush(FlushOptions())); + ColumnFamilyMetaData meta_3; + db_->GetColumnFamilyMetaData(&meta_3); + auto& files_3 = meta_3.levels[0].files; + std::string file3 = files_3[0].db_path + files_3[0].name; + std::string file4 = files_3[1].db_path + files_3[1].name; + ASSERT_OK(db_->CompactFiles(CompactionOptions(), {file3, file4}, 2)); + // To confirm L2 shape + ColumnFamilyMetaData meta_4; + db_->GetColumnFamilyMetaData(&meta_4); + ASSERT_EQ(meta_4.levels[0].files.size(), 0); + ASSERT_EQ(meta_4.levels[1].files.size(), 1); + ASSERT_EQ(meta_4.levels[2].files.size(), 1); + ASSERT_EQ(meta_4.levels[2].files[0].largest_seqno, 104); + + // Ingest a file with new value of the key "k2overlap" + SstFileWriter sst_file_writer(EnvOptions(), options); + std::string f = sst_files_dir_ + "f.sst"; + ASSERT_OK(sst_file_writer.Open(f)); + ASSERT_OK(sst_file_writer.Put("k2overlap", "new_value")); + ExternalSstFileInfo f_info; + ASSERT_OK(sst_file_writer.Finish(&f_info)); + ASSERT_OK(db_->IngestExternalFile({f}, IngestExternalFileOptions())); + + // To verify new value of the key "k2overlap" is correctly returned + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "k2overlap", &value)); + // Before the fix, the value would be "old_value" and assertion failed + ASSERT_EQ(value, "new_value"); +} + TEST_F(ExternalSSTFileBasicTest, IngestRangeDeletionTombstoneWithGlobalSeqno) { for (int i = 5; i < 25; i++) { ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index a4a194714..778e054c7 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -937,26 +937,6 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile( overlap_with_db = true; break; } - - if (compaction_style == kCompactionStyleUniversal && lvl != 0) { - const std::vector& level_files = - vstorage->LevelFiles(lvl); - const SequenceNumber level_largest_seqno = - (*std::max_element(level_files.begin(), level_files.end(), - [](FileMetaData* f1, FileMetaData* f2) { - return f1->fd.largest_seqno < - f2->fd.largest_seqno; - })) - ->fd.largest_seqno; - // should only assign seqno to current level's largest seqno when - // the file fits - if (level_largest_seqno != 0 && - IngestedFileFitInLevel(file_to_ingest, lvl)) { - *assigned_seqno = level_largest_seqno; - } else { - continue; - } - } } else if (compaction_style == kCompactionStyleUniversal) { continue; } diff --git a/unreleased_history/bug_fixes/new_ingested_data_with_old_seqno.md b/unreleased_history/bug_fixes/new_ingested_data_with_old_seqno.md new file mode 100644 index 000000000..8d0f32b4b --- /dev/null +++ b/unreleased_history/bug_fixes/new_ingested_data_with_old_seqno.md @@ -0,0 +1 @@ +Fix a bug where older data of an ingested key can be returned for read when universal compaction is used