Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(meta): meta server failed and could not be started normally while setting environment variables after dropping table #2148

Merged
merged 15 commits into from
Nov 25, 2024
1 change: 1 addition & 0 deletions src/meta/meta_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ class meta_service : public serverlet<meta_service>
friend class meta_partition_guardian_test;
friend class meta_service_test;
friend class meta_service_test_app;
friend class server_state_test;
friend class meta_split_service_test;
friend class meta_test_base;
friend class policy_context_test;
Expand Down
89 changes: 73 additions & 16 deletions src/meta/server_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@
#include <boost/lexical_cast.hpp>
// IWYU pragma: no_include <ext/alloc_traits.h>
#include <fmt/core.h>
#include <string.h>
#include <algorithm>
#include <atomic>
#include <chrono>
#include <cstdint>
#include <cstring>
#include <set>
#include <sstream> // IWYU pragma: keep
#include <string>
#include <string_view>
#include <thread>
#include <unordered_map>

Expand Down Expand Up @@ -75,6 +76,7 @@
#include "utils/blob.h"
#include "utils/command_manager.h"
#include "utils/config_api.h"
#include "utils/fail_point.h"
#include "utils/flags.h"
#include "utils/fmt_logging.h"
#include "utils/metrics.h"
Expand Down Expand Up @@ -1004,7 +1006,7 @@ void server_state::query_configuration_by_index(const query_cfg_request &request
/*out*/ query_cfg_response &response)
{
zauto_read_lock l(_lock);
auto iter = _exist_apps.find(request.app_name.c_str());
auto iter = _exist_apps.find(request.app_name);
if (iter == _exist_apps.end()) {
response.err = ERR_OBJECT_NOT_FOUND;
return;
Expand Down Expand Up @@ -2990,12 +2992,13 @@ void server_state::set_app_envs(const app_env_rpc &env_rpc)
LOG_WARNING("set app envs failed with invalid request");
return;
}

const std::vector<std::string> &keys = request.keys;
const std::vector<std::string> &values = request.values;
const std::string &app_name = request.app_name;

std::ostringstream os;
for (int i = 0; i < keys.size(); i++) {
for (size_t i = 0; i < keys.size(); ++i) {
if (i != 0) {
os << ", ";
}
Expand All @@ -3012,6 +3015,7 @@ void server_state::set_app_envs(const app_env_rpc &env_rpc)

os << keys[i] << "=" << values[i];
}

LOG_INFO("set app envs for app({}) from remote({}): kvs = {}",
app_name,
env_rpc.remote_address(),
Expand All @@ -3020,31 +3024,84 @@ void server_state::set_app_envs(const app_env_rpc &env_rpc)
app_info ainfo;
std::string app_path;
{
FAIL_POINT_INJECT_NOT_RETURN_F("set_app_envs_failed", [app_name, this](std::string_view s) {
zauto_write_lock l(_lock);

if (s == "not_found") {
CHECK_EQ(_exist_apps.erase(app_name), 1);
return;
}

if (s == "dropping") {
gutil::FindOrDie(_exist_apps, app_name)->status = app_status::AS_DROPPING;
return;
}
});

zauto_read_lock l(_lock);
std::shared_ptr<app_state> app = get_app(app_name);
if (app == nullptr) {
LOG_WARNING("set app envs failed with invalid app_name({})", app_name);
env_rpc.response().err = ERR_INVALID_PARAMETERS;
env_rpc.response().hint_message = "invalid app name";

const auto &app = get_app(app_name);
if (!app) {
LOG_WARNING("set app envs failed since app_name({}) cannot be found", app_name);
env_rpc.response().err = ERR_APP_NOT_EXIST;
env_rpc.response().hint_message = "app cannot be found";
return;
}

if (app->status == app_status::AS_DROPPING) {
LOG_WARNING("set app envs failed since app(name={}, id={}) is being dropped",
app_name,
app->app_id);
env_rpc.response().err = ERR_BUSY_DROPPING;
env_rpc.response().hint_message = "app is being dropped";
return;
} else {
ainfo = *(reinterpret_cast<app_info *>(app.get()));
app_path = get_app_path(*app);
}

ainfo = *app;
app_path = get_app_path(*app);
}
for (int idx = 0; idx < keys.size(); idx++) {

for (size_t idx = 0; idx < keys.size(); ++idx) {
ainfo.envs[keys[idx]] = values[idx];
}

do_update_app_info(app_path, ainfo, [this, app_name, keys, values, env_rpc](error_code ec) {
CHECK_EQ_MSG(ec, ERR_OK, "update app info to remote storage failed");
CHECK_EQ_MSG(ec, ERR_OK, "update app({}) info to remote storage failed", app_name);

zauto_write_lock l(_lock);

FAIL_POINT_INJECT_NOT_RETURN_F("set_app_envs_failed", [app_name, this](std::string_view s) {
if (s == "dropped_after") {
CHECK_EQ(_exist_apps.erase(app_name), 1);
return;
}
});

std::shared_ptr<app_state> app = get_app(app_name);
std::string old_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');
for (int idx = 0; idx < keys.size(); idx++) {

// The table might be removed just before the callback function is invoked, thus we must
// check if this table still exists.
//
// TODO(wangdan): should make updates to remote storage sequential by supporting atomic
// set, otherwise update might be missing. For example, an update is setting the envs
// while another is dropping a table. The update setting the envs does not contain the
// dropped state. Once it is applied by remote storage after another update dropping
// the table, the state of the table would always be non-dropped on remote storage.
if (!app) {
LOG_ERROR("set app envs failed since app({}) has just been dropped", app_name);
env_rpc.response().err = ERR_APP_DROPPED;
env_rpc.response().hint_message = "app has just been dropped";
return;
}

const auto &old_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');

// Update envs of local memory.
for (size_t idx = 0; idx < keys.size(); ++idx) {
app->envs[keys[idx]] = values[idx];
}
std::string new_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');

const auto &new_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');
LOG_INFO("app envs changed: old_envs = {}, new_envs = {}", old_envs, new_envs);
});
}
Expand Down
21 changes: 10 additions & 11 deletions src/meta/server_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
// IWYU pragma: no_include <boost/detail/basic_pointerbuf.hpp>
#include <boost/lexical_cast.hpp>
#include <gtest/gtest_prod.h>
#include <stdint.h>
#include <cstdint>
#include <functional>
#include <map>
#include <memory>
Expand All @@ -42,6 +42,7 @@
#include "common/gpid.h"
#include "common/manual_compact.h"
#include "dsn.layer2_types.h"
#include "gutil/map_util.h"
#include "meta/meta_rpc_types.h"
#include "meta_data.h"
#include "table_metrics.h"
Expand Down Expand Up @@ -140,20 +141,17 @@ class server_state

void lock_read(zauto_read_lock &other);
void lock_write(zauto_write_lock &other);
const meta_view get_meta_view() { return {&_all_apps, &_nodes}; }
std::shared_ptr<app_state> get_app(const std::string &name) const

meta_view get_meta_view() { return {&_all_apps, &_nodes}; }

std::shared_ptr<app_state> get_app(const std::string &app_name) const
{
auto iter = _exist_apps.find(name);
if (iter == _exist_apps.end())
return nullptr;
return iter->second;
return gutil::FindWithDefault(_exist_apps, app_name);
}

std::shared_ptr<app_state> get_app(int32_t app_id) const
{
auto iter = _all_apps.find(app_id);
if (iter == _all_apps.end())
return nullptr;
return iter->second;
return gutil::FindWithDefault(_all_apps, app_id);
}

void query_configuration_by_index(const query_cfg_request &request,
Expand Down Expand Up @@ -409,6 +407,7 @@ class server_state
friend class meta_split_service;
friend class meta_split_service_test;
friend class meta_service_test_app;
friend class server_state_test;
friend class meta_test_base;
friend class test::test_checker;
friend class server_state_restore_test;
Expand Down
2 changes: 1 addition & 1 deletion src/meta/test/meta_service_test_app.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class meta_service_test_app : public dsn::service_app
void json_compacity();

// test server_state set_app_envs/del_app_envs/clear_app_envs
void app_envs_basic_test();
static void app_envs_basic_test();

// test for bug found
void adjust_dropped_size();
Expand Down
Loading
Loading