Skip to content

Commit

Permalink
fix(storage): suppress some hummock false panic and warn log (#17914)
Browse files Browse the repository at this point in the history
  • Loading branch information
wenym1 authored Aug 9, 2024
1 parent efdbf3c commit 13af20c
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 29 deletions.
36 changes: 23 additions & 13 deletions src/meta/src/hummock/manager/commit_epoch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use risingwave_hummock_sdk::{
use risingwave_pb::hummock::compact_task::{self};
use risingwave_pb::hummock::HummockSnapshot;
use sea_orm::TransactionTrait;
use tracing::warn;

use crate::hummock::error::{Error, Result};
use crate::hummock::manager::compaction_group_manager::CompactionGroupManager;
Expand Down Expand Up @@ -269,19 +270,28 @@ impl HummockManager {

// TODO: remove the sanity check when supporting partial checkpoint
assert_eq!(1, table_committed_epoch.len());
assert_eq!(
table_committed_epoch.iter().next().expect("non-empty"),
(
&epoch,
&version
.latest_version()
.state_table_info
.info()
.keys()
.cloned()
.collect()
)
);
{
let (table_committed_epoch, committed_table_ids) =
table_committed_epoch.iter().next().expect("non-empty");
assert_eq!(*table_committed_epoch, epoch);
let table_ids: HashSet<_> = version
.latest_version()
.state_table_info
.info()
.keys()
.cloned()
.collect();
assert!(table_ids.is_subset(committed_table_ids), "hummock table ids {table_ids:?} not a subset of table ids to commit{committed_table_ids:?}");
if cfg!(debug_assertions) {
assert_eq!(&table_ids, committed_table_ids);
} else if table_ids != *committed_table_ids {
let extra_table_ids = committed_table_ids - &table_ids;
warn!(
?extra_table_ids,
"ignore extra table ids that are not previously registered"
);
}
}

// Apply stats changes.
let mut version_stats = HummockVersionStatsTransaction::new(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,10 +358,6 @@ impl HummockVersion {
new_sst_start_id: u64,
) {
let mut new_sst_id = new_sst_start_id;
if !self.levels.contains_key(&parent_group_id) {
warn!(parent_group_id, "non-existing parent group id to init from");
return;
}
if parent_group_id == StaticCompactionGroupId::NewCompactionGroup as CompactionGroupId {
if new_sst_start_id != 0 {
if cfg!(debug_assertions) {
Expand All @@ -378,6 +374,10 @@ impl HummockVersion {
}
return;
}
if !self.levels.contains_key(&parent_group_id) {
warn!(parent_group_id, "non-existing parent group id to init from");
return;
}
let [parent_levels, cur_levels] = self
.levels
.get_many_mut([&parent_group_id, &group_id])
Expand Down
16 changes: 12 additions & 4 deletions src/storage/src/hummock/event_handler/hummock_event_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,18 @@ impl HummockEventHandler {
}
}
if !pending.is_empty() {
warn!(
pending_count = pending.len(),
total_count, "cannot acquire lock for all read version"
);
if pending.len() * 10 > total_count {
// Only print warn log when failed to acquire more than 10%
warn!(
pending_count = pending.len(),
total_count, "cannot acquire lock for all read version"
);
} else {
debug!(
pending_count = pending.len(),
total_count, "cannot acquire lock for all read version"
);
}
}

const TRY_LOCK_TIMEOUT: Duration = Duration::from_millis(1);
Expand Down
2 changes: 1 addition & 1 deletion src/storage/src/hummock/store/hummock_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ impl HummockStorage {
v.vnodes().iter_ones().collect_vec()
})
.collect_vec();
panic!("There are {} read version associated with vnode {}. read_version_vnodes={:?}", read_version_vnodes.len(), vnode.to_index(), read_version_vnodes);
return Err(HummockError::other(format!("There are {} read version associated with vnode {}. read_version_vnodes={:?}", read_version_vnodes.len(), vnode.to_index(), read_version_vnodes)).into());
}
read_filter_for_version(
epoch,
Expand Down
21 changes: 14 additions & 7 deletions src/storage/src/hummock/store/local_hummock_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ use crate::hummock::utils::{
};
use crate::hummock::write_limiter::WriteLimiterRef;
use crate::hummock::{
BackwardSstableIterator, MemoryLimiter, SstableIterator, SstableIteratorReadOptions,
SstableStoreRef,
BackwardSstableIterator, HummockError, MemoryLimiter, SstableIterator,
SstableIteratorReadOptions, SstableStoreRef,
};
use crate::mem_table::{KeyOp, MemTable, MemTableHummockIterator, MemTableHummockRevIterator};
use crate::monitor::{HummockStateStoreMetrics, IterLocalMetricsGuard, StoreLocalStatistic};
Expand Down Expand Up @@ -495,7 +495,9 @@ impl LocalStateStore for LocalHummockStorage {
instance_id: self.instance_id(),
init_epoch: options.epoch.curr,
})
.expect("should succeed");
.map_err(|_| {
HummockError::other("failed to send InitEpoch. maybe shutting down")
})?;
}
Ok(())
}
Expand Down Expand Up @@ -528,14 +530,17 @@ impl LocalStateStore for LocalHummockStorage {
});
}
}
if !self.is_replicated {
self.event_sender
if !self.is_replicated
&& self
.event_sender
.send(HummockEvent::LocalSealEpoch {
instance_id: self.instance_id(),
next_epoch,
opts,
})
.expect("should be able to send");
.is_err()
{
warn!("failed to send LocalSealEpoch. maybe shutting down");
}
}

Expand Down Expand Up @@ -624,7 +629,9 @@ impl LocalHummockStorage {
if !self.is_replicated {
self.event_sender
.send(HummockEvent::ImmToUploader { instance_id, imm })
.unwrap();
.map_err(|_| {
HummockError::other("failed to send imm to uploader. maybe shutting down")
})?;
}
imm_size
} else {
Expand Down

0 comments on commit 13af20c

Please sign in to comment.