Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement dynamic pruning strategy #1295

Merged
merged 2 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 6 additions & 154 deletions trin-storage/src/versioned/id_indexed_v1/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,12 @@ use ethportal_api::types::portal_wire::ProtocolId;
use r2d2::Pool;
use r2d2_sqlite::SqliteConnectionManager;

use crate::{
versioned::{usage_stats::UsageStats, ContentType},
DistanceFunction, PortalStorageConfig, BYTES_IN_MB_U64,
};
use crate::{versioned::ContentType, DistanceFunction, PortalStorageConfig, BYTES_IN_MB_U64};

/// The fraction of the storage capacity that we should aim for when pruning.
const TARGET_CAPACITY_FRACTION: f64 = 0.95;
use super::pruning_strategy::PruningConfig;

/// The config for the IdIndexedV1Store
#[derive(Debug, Clone)]
#[derive(Clone, Debug)]
pub struct IdIndexedV1StoreConfig {
pub content_type: ContentType,
pub network: ProtocolId,
Expand All @@ -23,6 +19,7 @@ pub struct IdIndexedV1StoreConfig {
pub storage_capacity_bytes: u64,
pub sql_connection_pool: Pool<SqliteConnectionManager>,
pub distance_fn: DistanceFunction,
pub pruning_config: PruningConfig,
}

impl IdIndexedV1StoreConfig {
Expand All @@ -39,153 +36,8 @@ impl IdIndexedV1StoreConfig {
storage_capacity_bytes: config.storage_capacity_mb * BYTES_IN_MB_U64,
sql_connection_pool: config.sql_connection_pool,
distance_fn: config.distance_fn,
// consider making this a parameter if we start using non-default value
pruning_config: PruningConfig::default(),
}
}

/// The capacity that we aim for when pruning.
pub fn target_capacity_bytes(&self) -> u64 {
(self.storage_capacity_bytes as f64 * TARGET_CAPACITY_FRACTION).round() as u64
}

/// Returns the estimated number of items to delete to reach target capacity. It returns 0 if
/// already below target capacity.
pub fn estimate_to_delete_until_target(&self, usage_stats: &UsageStats) -> u64 {
self.estimated_target_capacity_count(usage_stats)
.map(|target_capacity_count| {
if usage_stats.entry_count > target_capacity_count {
usage_stats.entry_count - target_capacity_count
} else {
0
}
})
.unwrap_or(0)
}

fn estimated_target_capacity_count(&self, usage_stats: &UsageStats) -> Option<u64> {
usage_stats
.average_entry_size_bytes()
.map(|average_entry_size_bytes| {
(self.target_capacity_bytes() as f64 / average_entry_size_bytes).floor() as u64
})
}
}

#[cfg(test)]
mod tests {
use std::path::PathBuf;

use discv5::enr::NodeId;
use ethportal_api::types::portal_wire::ProtocolId;
use r2d2::Pool;
use r2d2_sqlite::SqliteConnectionManager;
use rstest::rstest;

use crate::{versioned::ContentType, DistanceFunction};

use super::*;

const STORAGE_CAPACITY_BYTES: u64 = 1000;

fn create_config() -> IdIndexedV1StoreConfig {
IdIndexedV1StoreConfig {
content_type: ContentType::State,
network: ProtocolId::State,
node_id: NodeId::random(),
node_data_dir: PathBuf::default(),
storage_capacity_bytes: STORAGE_CAPACITY_BYTES,
sql_connection_pool: Pool::new(SqliteConnectionManager::memory()).unwrap(),
distance_fn: DistanceFunction::Xor,
}
}

#[rstest]
#[case::no_usage(0, 0, false)]
#[case::low_usage(10, 100, false)]
#[case::just_below_target_capacity(89, 890, false)]
#[case::target_capacity(90, 900, false)]
#[case::between_target_and_pruning(92, 920, false)]
#[case::pruning(95, 950, false)]
#[case::between_pruning_and_full(97, 970, true)]
#[case::full(100, 1000, true)]
#[case::above_full(110, 1100, true)]
fn is_above_target_capacity(
#[case] entry_count: u64,
#[case] total_entry_size_bytes: u64,
#[case] expected: bool,
) {
let config = create_config();
let usage_stats = UsageStats::new(entry_count, total_entry_size_bytes);

assert_eq!(
usage_stats.is_above(config.target_capacity_bytes()),
expected
);
}

#[test]
fn estimate_capacity_count_no_usage() {
let config = create_config();
let usage_stats = UsageStats::default();
assert_eq!(
config.estimated_target_capacity_count(&usage_stats),
None,
"testing estimated_target_capacity_count"
);
}

#[rstest]
#[case::low_usage_1(10, 100, 95)]
#[case::low_usage_2(20, 100, 190)]
#[case::low_usage_3(50, 100, 475)]
#[case::mid_usage_1(10, 500, 19)]
#[case::mid_usage_2(20, 500, 38)]
#[case::mid_usage_3(50, 500, 95)]
#[case::between_target_and_full_1(10, 970, 9)]
#[case::between_target_and_full_2(20, 970, 19)]
#[case::between_target_and_full_3(50, 970, 48)]
#[case::between_target_and_full_4(100, 970, 97)]
#[case::above_full_1(10, 1050, 9)]
#[case::above_full_2(20, 1050, 18)]
#[case::above_full_3(50, 1050, 45)]
fn estimate_target_capacity_count(
#[case] entry_count: u64,
#[case] total_entry_size_bytes: u64,
#[case] estimated_target_capacity_count: u64,
) {
let config = create_config();
let usage_stats = UsageStats::new(entry_count, total_entry_size_bytes);
assert_eq!(
config.estimated_target_capacity_count(&usage_stats),
Some(estimated_target_capacity_count),
"testing estimated_target_capacity_count"
);
}

#[rstest]
#[case::low_usage_1(10, 100, 0)]
#[case::low_usage_2(20, 100, 0)]
#[case::low_usage_3(50, 100, 0)]
#[case::mid_usage_1(10, 500, 0)]
#[case::mid_usage_2(25, 500, 0)]
#[case::mid_usage_3(50, 500, 0)]
#[case::between_target_and_full_1(10, 970, 1)]
#[case::between_target_and_full_2(20, 970, 1)]
#[case::between_target_and_full_3(50, 970, 2)]
#[case::between_target_and_full_4(100, 970, 3)]
#[case::above_full_1(10, 1050, 1)]
#[case::above_full_2(20, 1050, 2)]
#[case::above_full_3(50, 1050, 5)]
fn to_delete_until_target(
#[case] entry_count: u64,
#[case] total_entry_size_bytes: u64,
#[case] expected_to_delete_until_target: u64,
) {
let config = create_config();
let usage_stats = UsageStats::new(entry_count, total_entry_size_bytes);

assert_eq!(
config.estimate_to_delete_until_target(&usage_stats),
expected_to_delete_until_target
);
}
}
1 change: 1 addition & 0 deletions trin-storage/src/versioned/id_indexed_v1/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod config;
mod migration;
mod pruning_strategy;
pub(super) mod sql;
mod store;

Expand Down
Loading
Loading