Skip to content

Commit

Permalink
Update for 0.1.4
Browse files Browse the repository at this point in the history
  • Loading branch information
criccomini committed Aug 30, 2024
1 parent 96c813c commit e092b25
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 64 deletions.
217 changes: 156 additions & 61 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ sidebar_position: 3
/// Configuration options for the database. These options are set on client startup.
#[derive(Clone)]
pub struct DbOptions {
/// How frequently to flush the write-ahead log to object storage (in
/// milliseconds).
/// How frequently to flush the write-ahead log to object storage.
///
/// When setting this configuration, users must consider:
///
Expand All @@ -28,11 +27,15 @@ pub struct DbOptions {
/// Keep in mind that the flush interval does not include the network latency. A
/// 100ms flush interval will result in a 100ms + the time it takes to send the
/// bytes to object storage.
pub flush_ms: usize,
pub flush_interval: Duration,

/// How frequently to poll for new manifest files (in milliseconds). Refreshing
/// the manifest file allows writers to detect fencing operations and allows
/// readers to detect newly compacted data.
/// If set to false, SlateDB will disable the WAL and write directly into the memtable
#[cfg(feature = "wal_disable")]
pub wal_enabled: bool,

/// How frequently to poll for new manifest files. Refreshing the manifest file
/// allows writers to detect fencing operations and allows readers to detect newly
/// compacted data.
///
/// **NOTE: SlateDB secondary readers (i.e. non-writer clients) do not currently
/// read from the WAL. Such readers only read from L0+. The manifest poll intervals
Expand All @@ -46,18 +49,20 @@ pub struct DbOptions {

/// The minimum size a memtable needs to be before it is frozen and flushed to
/// L0 object storage. Writes will still be flushed to the object storage WAL
/// (based on flush_ms) regardless of this value. Memtable sizes are checked
/// every `flush_ms` milliseconds.
/// (based on flush_interval) regardless of this value. Memtable sizes are checked
/// every `flush_interval`.
///
/// When setting this configuration, users must consider:
///
/// * **Recovery time**: The larger the L0 SSTable size threshold, the less
/// frequently it will be written. As a result, the more recovery data there
/// will be in the WAL if a process restarts.
/// * **Number of L0 SSTs**: The smaller the L0 SSTable size threshold, the more
/// L0 SSTables there will be. L0 SSTables are not range partitioned; each is its
/// own sorted table. As such, reads that don't hit the WAL or memtable will need
/// to scan all L0 SSTables. The more there are, the slower the scan will be.
/// * **Number of L0 SSTs/SRs**: The smaller the L0 SSTable size threshold, the
/// more SSTs and Sorted Runs there will be. L0 SSTables are not range
/// partitioned; each is its own sorted table. Similarly, each Sorted Run also
/// stores the entire keyspace. As such, reads that don't hit the WAL or memtable
/// may need to scan all L0 SSTables and Sorted Runs. The more there are, the
/// slower the scan will be.
/// * **Memory usage**: The larger the L0 SSTable size threshold, the larger the
/// unflushed in-memory memtable will grow. This shouldn't be a concern for most
/// workloads, but it's worth considering for workloads with very high L0
Expand All @@ -68,28 +73,148 @@ pub struct DbOptions {
/// writes; they don't see WAL writes. Thus, the higher the L0 SSTable size, the
/// less frequently they will be written, and the longer it will take for
/// secondary readers to see new data.
///
/// We recommend setting this value to a size that will result in one L0 SSTable
/// per-second. With a default compaction interval of 5 seconds, this will result
/// in 4 or 5 L0 SSTables per compaction. Thus, a writer putting 10MiB/s of data
/// would configure this value to 10 * 1024 * 1024 = 10_485_760 bytes.
pub l0_sst_size_bytes: usize,

/// Defines the max number of SSTs in l0. Memtables will not be flushed if there are more
/// l0 ssts than this value, until compaction can compact the ssts into compacted.
pub l0_max_ssts: usize,

/// Defines the max number of unflushed memtables. Writes will be paused if there
/// are more unflushed memtables than this value
pub max_unflushed_memtable: usize,

/// Configuration options for the compactor.
pub compactor_options: Option<CompactorOptions>,
pub compression_codec: Option<CompressionCodec>,
}
```

## Read Options
impl Default for DbOptions {
fn default() -> Self {
Self {
flush_interval: Duration::from_millis(100),
#[cfg(feature = "wal_disable")]
wal_enabled: true,
manifest_poll_interval: Duration::from_secs(1),
min_filter_keys: 1000,
l0_sst_size_bytes: 64 * 1024 * 1024,
max_unflushed_memtable: 2,
l0_max_ssts: 8,
compactor_options: Some(CompactorOptions::default()),
compression_codec: None,
}
}
}

```rust
/// Whether reads see data that's been written to object storage.
/// The compression algorithm to use for SSTables.
#[derive(Clone, Copy, Debug)]
pub enum CompressionCodec {
#[cfg(feature = "snappy")]
/// Snappy compression algorithm.
Snappy,
#[cfg(feature = "zlib")]
/// Zlib compression algorithm.
Zlib,
#[cfg(feature = "lz4")]
/// Lz4 compression algorithm.
Lz4,
#[cfg(feature = "zstd")]
/// Zstd compression algorithm.
Zstd,
}

impl FromStr for CompressionCodec {
type Err = SlateDBError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
#[cfg(feature = "snappy")]
"snappy" => Ok(Self::Snappy),
#[cfg(feature = "zlib")]
"zlib" => Ok(Self::Zlib),
#[cfg(feature = "lz4")]
"lz4" => Ok(Self::Lz4),
#[cfg(feature = "zstd")]
"zstd" => Ok(Self::Zstd),
_ => Err(SlateDBError::InvalidCompressionCodec),
}
}
}

pub trait CompactionSchedulerSupplier: Send + Sync {
fn compaction_scheduler(&self) -> Box<dyn CompactionScheduler>;
}

/// Options for the compactor.
#[derive(Clone)]
pub struct CompactorOptions {
/// The interval at which the compactor checks for a new manifest and decides
/// if a compaction must be scheduled
pub poll_interval: Duration,

/// A compacted SSTable's maximum size (in bytes). If more data needs to be
/// written to a Sorted Run during a compaction, a new SSTable will be created
/// in the Sorted Run when this size is exceeded.
pub max_sst_size: usize,

/// Supplies the compaction scheduler to use to select the compactions that should be
/// scheduled. Currently, the only provided implementation is
/// SizeTieredCompactionSchedulerSupplier
pub compaction_scheduler: Arc<dyn CompactionSchedulerSupplier>,

/// The maximum number of concurrent compactions to execute at once
pub max_concurrent_compactions: usize,
}

/// Default options for the compactor. Currently, only a
/// `SizeTieredCompactionScheduler` compaction strategy is implemented.
impl Default for CompactorOptions {
/// Returns a `CompactorOptions` with a 5 second poll interval and a 1GB max
/// SSTable size.
fn default() -> Self {
Self {
poll_interval: Duration::from_secs(5),
max_sst_size: 1024 * 1024 * 1024,
compaction_scheduler: Arc::new(SizeTieredCompactionSchedulerSupplier::new(
SizeTieredCompactionSchedulerOptions::default(),
)),
max_concurrent_compactions: 4,
}
}
}

#[derive(Clone)]
/// Options for the Size-Tiered Compaction Scheduler
pub struct SizeTieredCompactionSchedulerOptions {
/// The minimum number of sources to include together in a single compaction step.
pub min_compaction_sources: usize,
/// The maximum number of sources to include together in a single compaction step.
pub max_compaction_sources: usize,
/// The size threshold that the scheduler will use to determine if a sorted run should
/// be included in a given compaction. A sorted run S will be added to a compaction C if S's
/// size is less than this value times the min size of the runs currently included in C.
pub include_size_threshold: f32,
}

impl SizeTieredCompactionSchedulerOptions {
pub const fn default() -> Self {
Self {
min_compaction_sources: 4,
max_compaction_sources: 8,
include_size_threshold: 4.0,
}
}
}

/// Whether reads see only writes that have been committed durably to the DB. A
/// write is considered durably committed if all future calls to read are guaranteed
/// to serve the data written by the write, until some later durably committed write
/// updates the same key.
pub enum ReadLevel {
/// Client reads will only see data that's been written to object storage.
/// Client reads will only see data that's been committed durably to the DB.
Commited,

/// Clients will see all writes, including those not yet written to object
/// storage.
/// Clients will see all writes, including those not yet durably committed to the
/// DB.
Uncommitted,
}

Expand All @@ -108,53 +233,23 @@ impl ReadOptions {
}
}
}
```

## Write Options

```rust
/// Configuration for client write operations. `WriteOptions` is supplied for each
/// write call and controls the behavior of the write.
#[derive(Clone)]
pub struct WriteOptions {
/// Whether `put` calls should block until the write has been written to
/// object storage.
pub await_flush: bool,
/// Whether `put` calls should block until the write has been durably committed
/// to the DB.
pub await_durable: bool,
}

impl WriteOptions {
/// Create a new `WriteOptions`` with `await_flush` set to `true`.
/// Create a new `WriteOptions`` with `await_durable` set to `true`.
const fn default() -> Self {
Self { await_flush: true }
}
}
```

## Compactor Options

```rust
/// Options for the compactor.
#[derive(Clone)]
pub struct CompactorOptions {
/// The interval at which the compactor checks for a new manifest and decides
/// if a compaction must be scheduled
pub(crate) poll_interval: Duration,

/// A compacted SSTable's maximum size (in bytes). If more data needs to be
/// written during a compaction, a new SSTable will be created when this size
/// is exceeded.
pub(crate) max_sst_size: usize,
}

/// Default options for the compactor. Currently, only a
/// `SizeTieredCompactionScheduler` compaction strategy is implemented.
impl CompactorOptions {
/// Returns a `CompactorOptions` with a 5 second poll interval and a 1GB max
/// SSTable size.
pub const fn default() -> Self {
Self {
poll_interval: Duration::from_secs(5),
max_sst_size: 1024 * 1024 * 1024,
await_durable: true,
}
}
}

```
9 changes: 6 additions & 3 deletions docs/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,15 @@ async fn main() {
// Setup
let object_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
let options = DbOptions {
flush_ms: 100,
flush_interval: Duration::from_millis(100),
manifest_poll_interval: Duration::from_millis(100),
#[cfg(feature = "wal_disable")] wal_enabled: true,
min_filter_keys: 10,
l0_sst_size_bytes: 128,
l0_max_ssts: 8,
max_unflushed_memtable: 2,
compactor_options: Some(CompactorOptions::default()),
compression_codec: None,
};
let kv_store = Db::open_with_opts(
Path::from("/tmp/test_kv_store"),
Expand All @@ -62,5 +66,4 @@ async fn main() {

// Close
kv_store.close().await.unwrap();
}
```
}```

0 comments on commit e092b25

Please sign in to comment.