From 1ff58bc53f445d3f01a1ddaab5f4a92f5b337ea9 Mon Sep 17 00:00:00 2001
From: zwang28 <70626450+zwang28@users.noreply.github.com>
Date: Tue, 26 Sep 2023 16:52:36 +0800
Subject: [PATCH] refactor(backup): refine error message (#12388)
---
proto/backup_service.proto | 6 +--
src/ctl/src/cmd_impl/meta/backup_meta.rs | 22 +++++++--
src/meta/src/backup_restore/backup_manager.rs | 48 +++++++++----------
src/meta/src/rpc/service/backup_service.rs | 5 +-
src/rpc_client/src/meta_client.rs | 4 +-
5 files changed, 47 insertions(+), 38 deletions(-)
diff --git a/proto/backup_service.proto b/proto/backup_service.proto
index 425d3abb24e2f..feca5f17b7dc3 100644
--- a/proto/backup_service.proto
+++ b/proto/backup_service.proto
@@ -13,11 +13,8 @@ enum BackupJobStatus {
UNSPECIFIED = 0;
RUNNING = 1;
SUCCEEDED = 2;
- // NOT_FOUND indicates one of these cases:
- // - Invalid job id.
- // - Job has failed.
- // - Job has succeeded, but its resulted backup has been deleted later.
NOT_FOUND = 3;
+ FAILED = 4;
}
message BackupMetaRequest {}
message BackupMetaResponse {
@@ -29,6 +26,7 @@ message GetBackupJobStatusRequest {
message GetBackupJobStatusResponse {
uint64 job_id = 1;
BackupJobStatus job_status = 2;
+ string message = 3;
}
message DeleteMetaSnapshotRequest {
repeated uint64 snapshot_ids = 1;
diff --git a/src/ctl/src/cmd_impl/meta/backup_meta.rs b/src/ctl/src/cmd_impl/meta/backup_meta.rs
index 77c7f0edb7ca2..3238e22b35050 100644
--- a/src/ctl/src/cmd_impl/meta/backup_meta.rs
+++ b/src/ctl/src/cmd_impl/meta/backup_meta.rs
@@ -22,21 +22,33 @@ pub async fn backup_meta(context: &CtlContext) -> anyhow::Result<()> {
let meta_client = context.meta_client().await?;
let job_id = meta_client.backup_meta().await?;
loop {
- let job_status = meta_client.get_backup_job_status(job_id).await?;
+ let (job_status, message) = meta_client.get_backup_job_status(job_id).await?;
match job_status {
BackupJobStatus::Running => {
- tracing::info!("backup job is still running: job {}", job_id);
+ tracing::info!("backup job is still running: job {}, {}", job_id, message);
tokio::time::sleep(Duration::from_secs(1)).await;
}
BackupJobStatus::Succeeded => {
+ tracing::info!("backup job succeeded: job {}, {}", job_id, message);
break;
}
- _ => {
- return Err(anyhow::anyhow!("backup job failed: job {}", job_id));
+ BackupJobStatus::NotFound => {
+ return Err(anyhow::anyhow!(
+ "backup job status not found: job {}, {}",
+ job_id,
+ message
+ ));
}
+ BackupJobStatus::Failed => {
+ return Err(anyhow::anyhow!(
+ "backup job failed: job {}, {}",
+ job_id,
+ message
+ ));
+ }
+ _ => unreachable!("unknown backup job status"),
}
}
- tracing::info!("backup job succeeded: job {}", job_id);
Ok(())
}
diff --git a/src/meta/src/backup_restore/backup_manager.rs b/src/meta/src/backup_restore/backup_manager.rs
index c280572c796d4..819ea02e36346 100644
--- a/src/meta/src/backup_restore/backup_manager.rs
+++ b/src/meta/src/backup_restore/backup_manager.rs
@@ -68,9 +68,11 @@ pub struct BackupManager {
hummock_manager: HummockManagerRef,
backup_store: ArcSwap<(BoxedMetaSnapshotStorage, StoreConfig)>,
/// Tracks the running backup job. Concurrent jobs is not supported.
- running_backup_job: tokio::sync::Mutex