Skip to content

Commit

Permalink
feat: implement handle upgrade region instruction (#3013)
Browse files Browse the repository at this point in the history
* feat: implement task tracker

* feat: implement handle upgrade region instruction

* refactor: remove redundant code

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* refactor: refactor wait_for_replay_millis to wait_for_replay_timeout

* chore: apply suggestions from CR

* chore: apply suggestions from CR
  • Loading branch information
WenyXu authored Dec 28, 2023
1 parent bd0eed7 commit 485a91f
Show file tree
Hide file tree
Showing 13 changed files with 923 additions and 100 deletions.
6 changes: 4 additions & 2 deletions src/common/meta/src/instruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use std::time::Duration;

use serde::{Deserialize, Serialize};
use store_api::storage::{RegionId, RegionNumber};
Expand Down Expand Up @@ -140,11 +141,12 @@ pub struct UpgradeRegion {
pub region_id: RegionId,
/// The `last_entry_id` of old leader region.
pub last_entry_id: Option<u64>,
/// The second of waiting for a wal replay.
/// The timeout of waiting for a wal replay.
///
/// `None` stands for no wait,
/// it's helpful to verify whether the leader region is ready.
pub wait_for_replay_secs: Option<u64>,
#[serde(with = "humantime_serde")]
pub wait_for_replay_timeout: Option<Duration>,
}

#[derive(Debug, Clone, Serialize, Deserialize, Display)]
Expand Down
20 changes: 19 additions & 1 deletion src/datanode/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

use std::any::Any;
use std::sync::Arc;

use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
Expand All @@ -27,6 +28,19 @@ use table::error::Error as TableError;
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("Failed to execute async task"))]
AsyncTaskExecute {
location: Location,
source: Arc<Error>,
},

#[snafu(display("Failed to watch change"))]
WatchAsyncTaskChange {
location: Location,
#[snafu(source)]
error: tokio::sync::watch::error::RecvError,
},

#[snafu(display("Failed to handle heartbeat response"))]
HandleHeartbeatResponse {
location: Location,
Expand Down Expand Up @@ -292,7 +306,11 @@ impl ErrorExt for Error {
| MissingWalDirConfig { .. }
| MissingKvBackend { .. } => StatusCode::InvalidArguments,

PayloadNotExist { .. } | Unexpected { .. } => StatusCode::Unexpected,
PayloadNotExist { .. } | Unexpected { .. } | WatchAsyncTaskChange { .. } => {
StatusCode::Unexpected
}

AsyncTaskExecute { source, .. } => source.status_code(),

// TODO(yingwen): Further categorize http error.
ParseAddr { .. }
Expand Down
1 change: 1 addition & 0 deletions src/datanode/src/heartbeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use crate::metrics;
use crate::region_server::RegionServer;

pub(crate) mod handler;
pub(crate) mod task_tracker;

pub struct HeartbeatTask {
node_id: u64,
Expand Down
138 changes: 45 additions & 93 deletions src/datanode/src/heartbeat/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,130 +17,75 @@ use common_meta::error::{InvalidHeartbeatResponseSnafu, Result as MetaResult};
use common_meta::heartbeat::handler::{
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
};
use common_meta::instruction::{
DowngradeRegion, DowngradeRegionReply, Instruction, InstructionReply, OpenRegion, SimpleReply,
};
use common_meta::instruction::{Instruction, InstructionReply};
use common_meta::RegionIdent;
use common_telemetry::error;
use futures::future::BoxFuture;
use snafu::OptionExt;
use store_api::path_utils::region_dir;
use store_api::region_engine::SetReadonlyResponse;
use store_api::region_request::{RegionCloseRequest, RegionOpenRequest, RegionRequest};
use store_api::storage::RegionId;

use crate::error;
mod close_region;
mod downgrade_region;
mod open_region;
mod upgrade_region;

use super::task_tracker::TaskTracker;
use crate::region_server::RegionServer;

/// Handler for [Instruction::OpenRegion] and [Instruction::CloseRegion].
#[derive(Clone)]
pub struct RegionHeartbeatResponseHandler {
region_server: RegionServer,
catchup_tasks: TaskTracker<()>,
}

/// Handler of the instruction.
pub type InstructionHandler =
Box<dyn FnOnce(RegionServer) -> BoxFuture<'static, InstructionReply> + Send>;
Box<dyn FnOnce(HandlerContext) -> BoxFuture<'static, InstructionReply> + Send>;

#[derive(Clone)]
pub struct HandlerContext {
region_server: RegionServer,
catchup_tasks: TaskTracker<()>,
}

impl HandlerContext {
fn region_ident_to_region_id(region_ident: &RegionIdent) -> RegionId {
RegionId::new(region_ident.table_id, region_ident.region_number)
}
}

impl RegionHeartbeatResponseHandler {
/// Returns the [RegionHeartbeatResponseHandler].
pub fn new(region_server: RegionServer) -> Self {
Self { region_server }
Self {
region_server,
catchup_tasks: TaskTracker::new(),
}
}

/// Builds the [InstructionHandler].
fn build_handler(instruction: Instruction) -> MetaResult<InstructionHandler> {
match instruction {
Instruction::OpenRegion(OpenRegion {
region_ident,
region_storage_path,
region_options,
region_wal_options,
skip_wal_replay,
}) => Ok(Box::new(move |region_server| {
Box::pin(async move {
let region_id = Self::region_ident_to_region_id(&region_ident);
// TODO(niebayes): extends region options with region_wal_options.
let _ = region_wal_options;
let request = RegionRequest::Open(RegionOpenRequest {
engine: region_ident.engine,
region_dir: region_dir(&region_storage_path, region_id),
options: region_options,
skip_wal_replay,
});
let result = region_server.handle_request(region_id, request).await;

let success = result.is_ok();
let error = result.as_ref().map_err(|e| e.to_string()).err();

InstructionReply::OpenRegion(SimpleReply {
result: success,
error,
})
})
Instruction::OpenRegion(open_region) => Ok(Box::new(move |handler_context| {
handler_context.handle_open_region_instruction(open_region)
})),
Instruction::CloseRegion(region_ident) => Ok(Box::new(|region_server| {
Box::pin(async move {
let region_id = Self::region_ident_to_region_id(&region_ident);
let request = RegionRequest::Close(RegionCloseRequest {});
let result = region_server.handle_request(region_id, request).await;

match result {
Ok(_) => InstructionReply::CloseRegion(SimpleReply {
result: true,
error: None,
}),
Err(error::Error::RegionNotFound { .. }) => {
InstructionReply::CloseRegion(SimpleReply {
result: true,
error: None,
})
}
Err(err) => InstructionReply::CloseRegion(SimpleReply {
result: false,
error: Some(err.to_string()),
}),
}
})
Instruction::CloseRegion(close_region) => Ok(Box::new(|handler_context| {
handler_context.handle_close_region_instruction(close_region)
})),
Instruction::DowngradeRegion(DowngradeRegion { region_id }) => {
Ok(Box::new(move |region_server| {
Box::pin(async move {
match region_server.set_readonly_gracefully(region_id).await {
Ok(SetReadonlyResponse::Success { last_entry_id }) => {
InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id,
exists: true,
error: None,
})
}
Ok(SetReadonlyResponse::NotFound) => {
InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id: None,
exists: false,
error: None,
})
}
Err(err) => InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id: None,
exists: false,
error: Some(err.to_string()),
}),
}
})
Instruction::DowngradeRegion(downgrade_region) => {
Ok(Box::new(move |handler_context| {
handler_context.handle_downgrade_region_instruction(downgrade_region)
}))
}
Instruction::UpgradeRegion(_) => {
todo!()
}
Instruction::UpgradeRegion(upgrade_region) => Ok(Box::new(move |handler_context| {
handler_context.handle_upgrade_region_instruction(upgrade_region)
})),
Instruction::InvalidateTableIdCache(_) | Instruction::InvalidateTableNameCache(_) => {
InvalidHeartbeatResponseSnafu.fail()
}
}
}

fn region_ident_to_region_id(region_ident: &RegionIdent) -> RegionId {
RegionId::new(region_ident.table_id, region_ident.region_number)
}
}

#[async_trait]
Expand All @@ -162,9 +107,14 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {

let mailbox = ctx.mailbox.clone();
let region_server = self.region_server.clone();
let catchup_tasks = self.catchup_tasks.clone();
let handler = Self::build_handler(instruction)?;
let _handle = common_runtime::spawn_bg(async move {
let reply = handler(region_server).await;
let reply = handler(HandlerContext {
region_server,
catchup_tasks,
})
.await;

if let Err(e) = mailbox.send((meta, reply)).await {
error!(e; "Failed to send reply to mailbox");
Expand All @@ -184,10 +134,12 @@ mod tests {
use common_meta::heartbeat::mailbox::{
HeartbeatMailbox, IncomingMessage, MailboxRef, MessageMeta,
};
use common_meta::instruction::{DowngradeRegion, OpenRegion};
use mito2::config::MitoConfig;
use mito2::engine::MITO_ENGINE_NAME;
use mito2::test_util::{CreateRequestBuilder, TestEnv};
use store_api::region_request::RegionRequest;
use store_api::path_utils::region_dir;
use store_api::region_request::{RegionCloseRequest, RegionRequest};
use store_api::storage::RegionId;
use tokio::sync::mpsc::{self, Receiver};

Expand Down
54 changes: 54 additions & 0 deletions src/datanode/src/heartbeat/handler/close_region.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_error::ext::ErrorExt;
use common_meta::instruction::{InstructionReply, SimpleReply};
use common_meta::RegionIdent;
use common_telemetry::warn;
use futures_util::future::BoxFuture;
use store_api::region_request::{RegionCloseRequest, RegionRequest};

use crate::error;
use crate::heartbeat::handler::HandlerContext;

impl HandlerContext {
pub(crate) fn handle_close_region_instruction(
self,
region_ident: RegionIdent,
) -> BoxFuture<'static, InstructionReply> {
Box::pin(async move {
let region_id = Self::region_ident_to_region_id(&region_ident);
let request = RegionRequest::Close(RegionCloseRequest {});
let result = self.region_server.handle_request(region_id, request).await;

match result {
Ok(_) => InstructionReply::CloseRegion(SimpleReply {
result: true,
error: None,
}),
Err(error::Error::RegionNotFound { .. }) => {
warn!("Received a close region instruction from meta, but target region:{region_id} is not found.");
InstructionReply::CloseRegion(SimpleReply {
result: true,
error: None,
})
}
Err(err) => InstructionReply::CloseRegion(SimpleReply {
result: false,
error: Some(err.output_msg()),
}),
}
})
}
}
51 changes: 51 additions & 0 deletions src/datanode/src/heartbeat/handler/downgrade_region.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_error::ext::ErrorExt;
use common_meta::instruction::{DowngradeRegion, DowngradeRegionReply, InstructionReply};
use futures_util::future::BoxFuture;
use store_api::region_engine::SetReadonlyResponse;

use crate::heartbeat::handler::HandlerContext;

impl HandlerContext {
pub(crate) fn handle_downgrade_region_instruction(
self,
DowngradeRegion { region_id }: DowngradeRegion,
) -> BoxFuture<'static, InstructionReply> {
Box::pin(async move {
match self.region_server.set_readonly_gracefully(region_id).await {
Ok(SetReadonlyResponse::Success { last_entry_id }) => {
InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id,
exists: true,
error: None,
})
}
Ok(SetReadonlyResponse::NotFound) => {
InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id: None,
exists: false,
error: None,
})
}
Err(err) => InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id: None,
exists: true,
error: Some(err.output_msg()),
}),
}
})
}
}
Loading

0 comments on commit 485a91f

Please sign in to comment.