Update for 0.1.4

slatedb · Aug 30, 2024 · e092b25 · e092b25
1 parent 96c813c
commit e092b25
Show file tree

Hide file tree

Showing 2 changed files with 162 additions and 64 deletions.
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -10,8 +10,7 @@ sidebar_position: 3
 /// Configuration options for the database. These options are set on client startup.
 #[derive(Clone)]
 pub struct DbOptions {
-    /// How frequently to flush the write-ahead log to object storage (in
-    /// milliseconds).
+    /// How frequently to flush the write-ahead log to object storage.
     ///
     /// When setting this configuration, users must consider:
     ///
@@ -28,11 +27,15 @@ pub struct DbOptions {
     /// Keep in mind that the flush interval does not include the network latency. A
     /// 100ms flush interval will result in a 100ms + the time it takes to send the
     /// bytes to object storage.
-    pub flush_ms: usize,
+    pub flush_interval: Duration,
 
-    /// How frequently to poll for new manifest files (in milliseconds). Refreshing
-    /// the manifest file allows writers to detect fencing operations and allows
-    /// readers to detect newly compacted data.
+    /// If set to false, SlateDB will disable the WAL and write directly into the memtable
+    #[cfg(feature = "wal_disable")]
+    pub wal_enabled: bool,
+
+    /// How frequently to poll for new manifest files. Refreshing the manifest file
+    /// allows writers to detect fencing operations and allows readers to detect newly
+    /// compacted data.
     ///
     /// **NOTE: SlateDB secondary readers (i.e. non-writer clients) do not currently
     /// read from the WAL. Such readers only read from L0+. The manifest poll intervals
@@ -46,18 +49,20 @@ pub struct DbOptions {
 
     /// The minimum size a memtable needs to be before it is frozen and flushed to
     /// L0 object storage. Writes will still be flushed to the object storage WAL
-    /// (based on flush_ms) regardless of this value. Memtable sizes are checked
-    /// every `flush_ms` milliseconds.
+    /// (based on flush_interval) regardless of this value. Memtable sizes are checked
+    /// every `flush_interval`.
     ///
     /// When setting this configuration, users must consider:
     ///
     /// * **Recovery time**: The larger the L0 SSTable size threshold, the less
     ///   frequently it will be written. As a result, the more recovery data there
     ///   will be in the WAL if a process restarts.
-    /// * **Number of L0 SSTs**: The smaller the L0 SSTable size threshold, the more
-    ///   L0 SSTables there will be. L0 SSTables are not range partitioned; each is its
-    ///   own sorted table. As such, reads that don't hit the WAL or memtable will need
-    ///   to scan all L0 SSTables. The more there are, the slower the scan will be.
+    /// * **Number of L0 SSTs/SRs**: The smaller the L0 SSTable size threshold, the
+    ///   more SSTs and Sorted Runs there will be. L0 SSTables are not range
+    ///   partitioned; each is its own sorted table. Similarly, each Sorted Run also
+    ///   stores the entire keyspace. As such, reads that don't hit the WAL or memtable
+    ///   may need to scan all L0 SSTables and Sorted Runs. The more there are, the
+    ///   slower the scan will be.
     /// * **Memory usage**: The larger the L0 SSTable size threshold, the larger the
     ///   unflushed in-memory memtable will grow. This shouldn't be a concern for most
     ///   workloads, but it's worth considering for workloads with very high L0
@@ -68,28 +73,148 @@ pub struct DbOptions {
     ///   writes; they don't see WAL writes. Thus, the higher the L0 SSTable size, the
     ///   less frequently they will be written, and the longer it will take for
     ///   secondary readers to see new data.
-    ///
-    /// We recommend setting this value to a size that will result in one L0 SSTable
-    /// per-second. With a default compaction interval of 5 seconds, this will result
-    /// in 4 or 5 L0 SSTables per compaction. Thus, a writer putting 10MiB/s of data
-    /// would configure this value to 10 * 1024 * 1024 = 10_485_760 bytes.
     pub l0_sst_size_bytes: usize,
 
+    /// Defines the max number of SSTs in l0. Memtables will not be flushed if there are more
+    /// l0 ssts than this value, until compaction can compact the ssts into compacted.
+    pub l0_max_ssts: usize,
+
+    /// Defines the max number of unflushed memtables. Writes will be paused if there
+    /// are more unflushed memtables than this value
+    pub max_unflushed_memtable: usize,
+
     /// Configuration options for the compactor.
     pub compactor_options: Option<CompactorOptions>,
+    pub compression_codec: Option<CompressionCodec>,
 }
-```
 
-## Read Options
+impl Default for DbOptions {
+    fn default() -> Self {
+        Self {
+            flush_interval: Duration::from_millis(100),
+            #[cfg(feature = "wal_disable")]
+            wal_enabled: true,
+            manifest_poll_interval: Duration::from_secs(1),
+            min_filter_keys: 1000,
+            l0_sst_size_bytes: 64 * 1024 * 1024,
+            max_unflushed_memtable: 2,
+            l0_max_ssts: 8,
+            compactor_options: Some(CompactorOptions::default()),
+            compression_codec: None,
+        }
+    }
+}
 
-```rust
-/// Whether reads see data that's been written to object storage.
+/// The compression algorithm to use for SSTables.
+#[derive(Clone, Copy, Debug)]
+pub enum CompressionCodec {
+    #[cfg(feature = "snappy")]
+    /// Snappy compression algorithm.
+    Snappy,
+    #[cfg(feature = "zlib")]
+    /// Zlib compression algorithm.
+    Zlib,
+    #[cfg(feature = "lz4")]
+    /// Lz4 compression algorithm.
+    Lz4,
+    #[cfg(feature = "zstd")]
+    /// Zstd compression algorithm.
+    Zstd,
+}
+
+impl FromStr for CompressionCodec {
+    type Err = SlateDBError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            #[cfg(feature = "snappy")]
+            "snappy" => Ok(Self::Snappy),
+            #[cfg(feature = "zlib")]
+            "zlib" => Ok(Self::Zlib),
+            #[cfg(feature = "lz4")]
+            "lz4" => Ok(Self::Lz4),
+            #[cfg(feature = "zstd")]
+            "zstd" => Ok(Self::Zstd),
+            _ => Err(SlateDBError::InvalidCompressionCodec),
+        }
+    }
+}
+
+pub trait CompactionSchedulerSupplier: Send + Sync {
+    fn compaction_scheduler(&self) -> Box<dyn CompactionScheduler>;
+}
+
+/// Options for the compactor.
+#[derive(Clone)]
+pub struct CompactorOptions {
+    /// The interval at which the compactor checks for a new manifest and decides
+    /// if a compaction must be scheduled
+    pub poll_interval: Duration,
+
+    /// A compacted SSTable's maximum size (in bytes). If more data needs to be
+    /// written to a Sorted Run during a compaction, a new SSTable will be created
+    /// in the Sorted Run when this size is exceeded.
+    pub max_sst_size: usize,
+
+    /// Supplies the compaction scheduler to use to select the compactions that should be
+    /// scheduled. Currently, the only provided implementation is
+    /// SizeTieredCompactionSchedulerSupplier
+    pub compaction_scheduler: Arc<dyn CompactionSchedulerSupplier>,
+
+    /// The maximum number of concurrent compactions to execute at once
+    pub max_concurrent_compactions: usize,
+}
+
+/// Default options for the compactor. Currently, only a
+/// `SizeTieredCompactionScheduler` compaction strategy is implemented.
+impl Default for CompactorOptions {
+    /// Returns a `CompactorOptions` with a 5 second poll interval and a 1GB max
+    /// SSTable size.
+    fn default() -> Self {
+        Self {
+            poll_interval: Duration::from_secs(5),
+            max_sst_size: 1024 * 1024 * 1024,
+            compaction_scheduler: Arc::new(SizeTieredCompactionSchedulerSupplier::new(
+                SizeTieredCompactionSchedulerOptions::default(),
+            )),
+            max_concurrent_compactions: 4,
+        }
+    }
+}
+
+#[derive(Clone)]
+/// Options for the Size-Tiered Compaction Scheduler
+pub struct SizeTieredCompactionSchedulerOptions {
+    /// The minimum number of sources to include together in a single compaction step.
+    pub min_compaction_sources: usize,
+    /// The maximum number of sources to include together in a single compaction step.
+    pub max_compaction_sources: usize,
+    /// The size threshold that the scheduler will use to determine if a sorted run should
+    /// be included in a given compaction. A sorted run S will be added to a compaction C if S's
+    /// size is less than this value times the min size of the runs currently included in C.
+    pub include_size_threshold: f32,
+}
+
+impl SizeTieredCompactionSchedulerOptions {
+    pub const fn default() -> Self {
+        Self {
+            min_compaction_sources: 4,
+            max_compaction_sources: 8,
+            include_size_threshold: 4.0,
+        }
+    }
+}
+
+/// Whether reads see only writes that have been committed durably to the DB.  A
+/// write is considered durably committed if all future calls to read are guaranteed
+/// to serve the data written by the write, until some later durably committed write
+/// updates the same key.
 pub enum ReadLevel {
-    /// Client reads will only see data that's been written to object storage.
+    /// Client reads will only see data that's been committed durably to the DB.
     Commited,
 
-    /// Clients will see all writes, including those not yet written to object
-    /// storage.
+    /// Clients will see all writes, including those not yet durably committed to the
+    /// DB.
     Uncommitted,
 }
 
@@ -108,53 +233,23 @@ impl ReadOptions {
         }
     }
 }
-```
-
-## Write Options
 
-```rust
 /// Configuration for client write operations. `WriteOptions` is supplied for each
 /// write call and controls the behavior of the write.
+#[derive(Clone)]
 pub struct WriteOptions {
-    /// Whether `put` calls should block until the write has been written to
-    /// object storage.
-    pub await_flush: bool,
+    /// Whether `put` calls should block until the write has been durably committed
+    /// to the DB.
+    pub await_durable: bool,
 }
 
 impl WriteOptions {
-    /// Create a new `WriteOptions`` with `await_flush` set to `true`.
+    /// Create a new `WriteOptions`` with `await_durable` set to `true`.
     const fn default() -> Self {
-        Self { await_flush: true }
-    }
-}
-```
-
-## Compactor Options
-
-```rust
-/// Options for the compactor.
-#[derive(Clone)]
-pub struct CompactorOptions {
-    /// The interval at which the compactor checks for a new manifest and decides
-    /// if a compaction must be scheduled
-    pub(crate) poll_interval: Duration,
-
-    /// A compacted SSTable's maximum size (in bytes). If more data needs to be
-    /// written during a compaction, a new SSTable will be created when this size
-    /// is exceeded.
-    pub(crate) max_sst_size: usize,
-}
-
-/// Default options for the compactor. Currently, only a
-/// `SizeTieredCompactionScheduler` compaction strategy is implemented.
-impl CompactorOptions {
-    /// Returns a `CompactorOptions` with a 5 second poll interval and a 1GB max
-    /// SSTable size.
-    pub const fn default() -> Self {
         Self {
-            poll_interval: Duration::from_secs(5),
-            max_sst_size: 1024 * 1024 * 1024,
+            await_durable: true,
         }
     }
 }
+
 ```
diff --git a/docs/quickstart.md b/docs/quickstart.md
@@ -31,11 +31,15 @@ async fn main() {
     // Setup
     let object_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
     let options = DbOptions {
-        flush_ms: 100,
+        flush_interval: Duration::from_millis(100),
         manifest_poll_interval: Duration::from_millis(100),
+        #[cfg(feature = "wal_disable")] wal_enabled: true,
         min_filter_keys: 10,
         l0_sst_size_bytes: 128,
+        l0_max_ssts: 8,
+        max_unflushed_memtable: 2,
         compactor_options: Some(CompactorOptions::default()),
+        compression_codec: None,
     };
     let kv_store = Db::open_with_opts(
         Path::from("/tmp/test_kv_store"),
@@ -62,5 +66,4 @@ async fn main() {
 
     // Close
     kv_store.close().await.unwrap();
-}
-```
+}```