Skip to content

Commit

Permalink
Add manifest file locking to prevent simultaneous DB instances or dbl…
Browse files Browse the repository at this point in the history
…ogutil modifications
  • Loading branch information
umegane committed Nov 18, 2024
1 parent 1455eae commit fa6a93e
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 4 deletions.
4 changes: 3 additions & 1 deletion include/limestone/api/datastore.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,6 @@ class datastore {
*/
void create_snapshot();

epoch_id_type last_durable_epoch_in_dir();

/**
* @brief requests the data store to rotate log files
Expand All @@ -346,6 +345,9 @@ class datastore {
int64_t current_unix_epoch_in_millis();

std::map<storage_id_type, write_version_type> clear_storage;

// File descriptor for file lock (flock) on the manifest file
int fd_for_flock_{-1};
};

} // namespace limestone::api
23 changes: 22 additions & 1 deletion src/limestone/datastore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ datastore::datastore(configuration const& conf) : location_(conf.data_locations_
}
}
internal::check_and_migrate_logdir_format(location_);

// acquire lock for manifest file
fd_for_flock_ = internal::acquire_manifest_lock(location_);
if (fd_for_flock_ == -1) {
if (errno == EWOULDBLOCK) {
std::string err_msg = "Another process is using the log directory: " + location_.string() + ". Terminate the conflicting process and restart this process.";
LOG_AND_THROW_IO_EXCEPTION(err_msg, errno);
} else {
std::string err_msg = "Failed to acquire lock for manifest in directory: " + location_.string();
LOG_AND_THROW_IO_EXCEPTION(err_msg, errno);
}
}

add_file(compaction_catalog_path);
compaction_catalog_ = std::make_unique<compaction_catalog>(compaction_catalog::from_catalog_file(location_));

Expand Down Expand Up @@ -257,7 +270,15 @@ std::future<void> datastore::shutdown() noexcept {
VLOG(log_info) << "/:limestone:datastore:shutdown compaction task has been stopped.";
}

return std::async(std::launch::async, []{
if (fd_for_flock_ != -1) {
if (::close(fd_for_flock_) == -1) {
VLOG(log_error) << "Failed to close lock file descriptor: " << strerror(errno);
} else {
fd_for_flock_ = -1;
}
}

return std::async(std::launch::async, [] {
std::this_thread::sleep_for(std::chrono::microseconds(100000));
VLOG(log_info) << "/:limestone:datastore:shutdown end";
});
Expand Down
19 changes: 19 additions & 0 deletions src/limestone/datastore_format.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/file.h>
#include <fcntl.h>
#include <unistd.h>

#include <boost/filesystem.hpp>
#include <nlohmann/json.hpp>
Expand Down Expand Up @@ -170,4 +173,20 @@ void check_and_migrate_logdir_format(const boost::filesystem::path& logdir) {
}
}

int acquire_manifest_lock(const boost::filesystem::path& logdir) {
boost::filesystem::path manifest_path = logdir / std::string(manifest_file_name);

int fd = ::open(manifest_path.string().c_str(), O_RDWR); // NOLINT(cppcoreguidelines-pro-type-vararg)

Check warning on line 179 in src/limestone/datastore_format.cpp

View workflow job for this annotation

GitHub Actions / Clang-Tidy

hicpp-vararg

do not call c-style vararg functions
if (fd == -1) {
return -1;
}

if (::flock(fd, LOCK_EX | LOCK_NB) == -1) {
::close(fd);
return -1;
}
VLOG_LP(log_info) << "acquired lock on manifest file: " << manifest_path.string();
return fd;
}

} // namespace limestone::internal
8 changes: 8 additions & 0 deletions src/limestone/dblogutil/dblogutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,19 @@ int main(char *dir, subcommand mode) { // NOLINT
}
try {
check_and_migrate_logdir_format(p);
int lock_fd = acquire_manifest_lock(p);
if (lock_fd == -1) {
LOG(ERROR) << "Another process is using the log directory: " << p
<< ". Terminate the conflicting process and re-execute the command. "
<< "Error: " << strerror(errno);
log_and_exit(64);
}
dblog_scan ds(p);
ds.set_thread_num(FLAGS_thread_num);
if (mode == cmd_inspect) inspect(ds, opt_epoch);
if (mode == cmd_repair) repair(ds, opt_epoch);
if (mode == cmd_compaction) compaction(ds, opt_epoch);
close(lock_fd);
} catch (limestone_exception& e) {
LOG(ERROR) << e.what();
log_and_exit(64);
Expand Down
7 changes: 7 additions & 0 deletions src/limestone/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,15 @@ void setup_initial_logdir(const boost::filesystem::path& logdir);
*/
int is_supported_version(const boost::filesystem::path& manifest_path, std::string& errmsg);

// Validates the manifest file in the specified log directory and performs repair or migration if necessary.
void check_and_migrate_logdir_format(const boost::filesystem::path& logdir);

// Acquires an exclusive lock on the manifest file.
// Returns the file descriptor on success, or -1 on failure.
// Note: This function does not log errors or handle them internally.
// The caller must check the return value and use errno for error handling.
int acquire_manifest_lock(const boost::filesystem::path& logdir);

// from datastore_restore.cpp

status purge_dir(const boost::filesystem::path& dir);
Expand Down
24 changes: 23 additions & 1 deletion test/limestone/api/datastore_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,32 @@ TEST_F(datastore_test, add_persistent_callback_test) { // NOLINT

}

TEST_F(datastore_test, prevent_double_start_test) { // NOLINT
if (system("rm -rf /tmp/datastore_test") != 0) {
std::cerr << "cannot remove directory" << std::endl;
}
if (system("mkdir -p /tmp/datastore_test/data_location /tmp/datastore_test/metadata_location") != 0) {
std::cerr << "cannot make directory" << std::endl;
}

std::vector<boost::filesystem::path> data_locations{};
data_locations.emplace_back(data_location);
boost::filesystem::path metadata_location_path{metadata_location};
limestone::api::configuration conf(data_locations, metadata_location_path);

auto ds1 = std::make_unique<limestone::api::datastore_test>(conf);
ds1->ready();

// another process is using the log directory
ASSERT_DEATH({
auto ds2 = std::make_unique<limestone::api::datastore_test>(conf);
}, "Another process is using the log directory");


// Ather datastore is created after the first one is destroyed
ds1->shutdown();
auto ds3 = std::make_unique<limestone::api::datastore_test>(conf);
ds3->ready();
ds3->shutdown();
}

} // namespace limestone::testing
2 changes: 2 additions & 0 deletions test/limestone/log/rotate_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ TEST_F(rotate_test, inactive_files_are_also_backed_up) { // NOLINT
channel1_1.add_entry(2, "k1", "v1", {42, 4});
channel1_1.end_session();
datastore_->switch_epoch(43);
datastore_->shutdown();
}
regen_datastore();
{
Expand All @@ -273,6 +274,7 @@ TEST_F(rotate_test, inactive_files_are_also_backed_up) { // NOLINT
channel2_0.add_entry(2, "k3", "v3", {44, 4});
channel2_0.end_session();
datastore_->switch_epoch(45);
datastore_->shutdown();
}

// setup done
Expand Down
30 changes: 29 additions & 1 deletion test/limestone/utils/dblogutil_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ int invoke(const std::string& command, std::string& out) {
class dblogutil_test : public ::testing::Test {
public:
static constexpr const char* location = "/tmp/dblogutil_test";
static constexpr const char* metadata_location = "/tmp/dblogutil_test/metadata";

void SetUp() {
boost::filesystem::remove_all(location);
Expand Down Expand Up @@ -565,4 +566,31 @@ TEST_F(dblogutil_test, invalid_epoch_option3) {
EXPECT_TRUE(contains(out, "invalid"));
}

} // namespace limestone::testing
TEST_F(dblogutil_test, execution_fails_while_active_datastore) {
// Inactive datastore
auto [rc, out] = inspect("pwal_0000", data_normal);
EXPECT_EQ(rc, 0);
EXPECT_NE(out.find("\n" "status: OK"), out.npos);

// Activate datastore
std::vector<boost::filesystem::path> data_locations{};
data_locations.emplace_back(location);
boost::filesystem::path metadata_location_path{metadata_location};
limestone::api::configuration conf(data_locations, metadata_location_path);
auto ds1 = std::make_unique<limestone::api::datastore_test>(conf);
ds1->ready();

// Attempt to run inspect while datastore is active
auto [rc_active, out_active] = inspect("pwal_0000", data_normal);
EXPECT_NE(rc_active, 0);
EXPECT_TRUE(contains(out_active, "Another process is using the log directory:"));

// Inactive datastore
ds1->shutdown();
ds1 = nullptr;
auto [rc_inactive, out_inacive] = inspect("pwal_0000", data_normal);
EXPECT_EQ(rc_inactive, 0);
EXPECT_NE(out_inacive.find("\n" "status: OK"), out.npos);
}

} // namespace limestone::testing

0 comments on commit fa6a93e

Please sign in to comment.