diff --git a/core/src/lib.rs b/core/src/lib.rs index 3542cca..3c73511 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -14,7 +14,7 @@ #![feature(unboxed_closures)] #![feature(unsize)] -mod error; +pub mod error; mod layers; mod os; mod prelude; @@ -22,3 +22,7 @@ mod tx; mod util; extern crate alloc; + +pub use layers::bio::{BlockId, BlockSet, Buf, BufMut, BufRef, BLOCK_SIZE}; +#[cfg(feature = "linux")] +pub use os::{Arc, Mutex, Vec}; diff --git a/linux/Makefile b/linux/Makefile index bee13cb..330a957 100644 --- a/linux/Makefile +++ b/linux/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -KDIR ?= +KDIR ?= /home/qingsong/workspace/rust-for-linux ifndef KDIR $(error The kernel source directory (KDIR) is not defined) endif diff --git a/linux/bindings/src/bindings.h b/linux/bindings/src/bindings.h index 678b701..2494bb0 100644 --- a/linux/bindings/src/bindings.h +++ b/linux/bindings/src/bindings.h @@ -8,7 +8,9 @@ * Sorted alphabetically. */ +#include #include +#include #include #include #include diff --git a/linux/bindings/src/helpers.c b/linux/bindings/src/helpers.c index 55b7e26..72f0f4b 100644 --- a/linux/bindings/src/helpers.c +++ b/linux/bindings/src/helpers.c @@ -21,6 +21,7 @@ */ #include +#include #include #include @@ -58,3 +59,28 @@ void helper_put_task_struct(struct task_struct *t) { put_task_struct(t); } + +void helper_bio_get(struct bio *bio) +{ + bio_get(bio); +} + +void helper_bio_set_dev(struct bio *bio, struct block_device *bdev) +{ + bio_set_dev(bio, bdev); +} + +bool helper_bio_has_data(struct bio *bio) +{ + return bio_has_data(bio); +} + +struct page *helper_virt_to_page(void *addr) +{ + return virt_to_page(addr); +} + +void *helper_page_to_virt(struct page *page) +{ + return page_to_virt(page); +} diff --git a/linux/dm-sworndisk/src/bio.rs b/linux/dm-sworndisk/src/bio.rs new file mode 100644 index 0000000..00cc92d --- /dev/null +++ b/linux/dm-sworndisk/src/bio.rs @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Ant Group CO., Ltd. + +//! Block I/O. + +use core::{ + ops::{Deref, DerefMut, Range}, + ptr::NonNull, +}; +use kernel::{prelude::*, types::Opaque}; + +use super::block_device::{BlockDevice, BLOCK_SECTORS, BLOCK_SIZE}; + +const GFP_NOIO: bindings::gfp_t = bindings::___GFP_DIRECT_RECLAIM | bindings::___GFP_KSWAPD_RECLAIM; + +/// A wrapper for kernel's struct `bio`. +pub struct Bio { + inner: NonNull, +} + +// SAFETY: we get a reference to kernel's `bio` by `bio_get`, so it won't disappear +// and can be safely transferred across threads. +unsafe impl Send for Bio {} + +impl Bio { + /// Constructs a `Bio` from raw pointer. + /// + /// # Safety + /// + /// User must provide a valid pointer to the kernel's `bio`. + pub unsafe fn from_raw(ptr: *mut bindings::bio) -> Self { + // Get a reference to a `bio`, so it won't disappear. And `bio_put` is + // called when it's been dropped. + bindings::bio_get(ptr); + Self { + inner: NonNull::new_unchecked(ptr), + } + } + + /// Allocates a bio for the target `BlockDevice`. + /// + /// The bio operation is specified by `op`, targeting a given block region, + /// while its I/O buffer is configured by `BioVec`. + pub fn alloc( + bdev: &BlockDevice, + op: BioOp, + region: Range, + io_buffer: &[BioVec], + ) -> Result { + // SAFETY: no safety requirements on this FFI call. + let bio = unsafe { + bindings::bio_alloc_bioset( + bdev.as_ptr(), + io_buffer.len() as _, + op as _, + GFP_NOIO, + &mut bindings::fs_bio_set, + ) + }; + if bio.is_null() { + return Err(ENOMEM); + } + + let mut buf_len = 0usize; + for bio_vec in io_buffer { + // SAFETY: the `bio` is non-null and valid (checked above), and `bio_vecs` + // contains valid pages. + unsafe { + let page = bindings::virt_to_page(bio_vec.ptr as _); + bindings::__bio_add_page(bio, page, bio_vec.len as _, bio_vec.offset as _); + buf_len += bio_vec.len; + } + } + + let start_sector = region.start * BLOCK_SECTORS; + let nr_bytes = region.len() * BLOCK_SIZE; + if nr_bytes != buf_len { + // SAFETY: the `bio` is non-null and valid. And the 'bio_alloc_bioset' + // method sets the reference count, so we call `bio_put` to drop it here. + unsafe { bindings::bio_put(bio) }; + return Err(EINVAL); + } + // SAFETY: the `bio` is non-null and valid. + unsafe { + (*bio).bi_iter.bi_sector = start_sector as _; + (*bio).bi_iter.bi_size = nr_bytes as _; + } + + // SAFETY: the `bio` is non-null and valid, and the 'bio_alloc_bioset' method + // sets the reference count, so we don't need to call `bio_get` here. + Ok(Self { + inner: unsafe { NonNull::new_unchecked(bio) }, + }) + } + + // Clones a bio that shares the original bio's I/O buffer. + pub fn alloc_clone(&self) -> Result { + let src = self.inner.as_ptr(); + // SAFETY: the `src.bi_bdev` is a valid `block_device`. + let cloned = unsafe { + bindings::bio_alloc_clone((*src).bi_bdev, src, GFP_NOIO, &mut bindings::fs_bio_set) + }; + if cloned.is_null() { + return Err(ENOMEM); + } + + // SAFETY: the `cloned` is non-null and valid, and the 'bio_alloc_clone' method + // sets the reference count, so we don't need to call `bio_get` here. + Ok(Self { + inner: unsafe { NonNull::new_unchecked(cloned) }, + }) + } + + /// Gets the operation and flags. The bottom 8 bits are encoding the operation, + /// and the remaining 24 for flags. + pub fn opf(&self) -> u32 { + // SAFETY: `self.inner` is a valid bio. + unsafe { (*self.inner.as_ptr()).bi_opf } + } + + /// Sets the operation and flags. + pub fn set_opf(&mut self, opf: u32) { + // SAFETY: `self.inner` is a valid bio. + unsafe { (*self.inner.as_ptr()).bi_opf = opf } + } + + /// Returns the operation of the `Bio`. + pub fn op(&self) -> BioOp { + BioOp::from(self.opf() as u8) + } + + /// Returns true if this bio carries any data. + pub fn has_data(&self) -> bool { + // SAFETY: `self.inner` is a valid bio. + unsafe { bindings::bio_has_data(self.inner.as_ptr()) } + } + + /// Sets the block device of bio request (remap the request). + pub fn set_dev(&mut self, bdev: &BlockDevice) { + // SAFETY: `self.inner` is a valid bio. + unsafe { bindings::bio_set_dev(self.inner.as_ptr(), bdev.as_ptr()) } + } + + /// Returns the start sector of the bio. + pub fn start_sector(&self) -> usize { + // SAFETY: `self.inner` is a valid bio. + unsafe { (*self.inner.as_ptr()).bi_iter.bi_sector as _ } + } + + /// Sets the start sector of the bio (remap the request). + pub fn set_start_sector(&mut self, sector: usize) { + // SAFETY: `self.inner` is a valid bio. + unsafe { (*self.inner.as_ptr()).bi_iter.bi_sector = sector as _ } + } + + /// Returns the length in bytes of the bio. + pub fn len(&self) -> usize { + // SAFETY: `self.inner` is a valid bio. + unsafe { (*self.inner.as_ptr()).bi_iter.bi_size as _ } + } + + unsafe extern "C" fn bi_end_io(bio: *mut bindings::bio) + where + F: FnOnce(&mut Bio, T), + F: Send, + T: Send, + { + let mut item = Box::from_raw((*bio).bi_private as *mut CallbackItem); + (item.func)(&mut item.base, item.data); + item.base.end(); + } + + /// Set the callback that will be called through the `bi_end_io` method. + pub fn set_callback(self, data: T, func: F) -> Result + where + F: FnOnce(&mut Bio, T) + 'static, + F: Send, + T: Send, + { + let cloned = self.alloc_clone()?; + let item = CallbackItem::try_new(self, data, func)?; + let bio = cloned.inner.as_ptr(); + + // SAFETY: `cloned.inner` is a valid bio. + unsafe { + (*bio).bi_private = Box::into_raw(item) as _; + (*bio).bi_end_io = Some(Bio::bi_end_io::); + } + Ok(cloned) + } + + /// Submits the bio. + /// + /// # Panics + /// + /// User should ensure that this is an unaltered `from_raw` bio, + /// or an owned bio returned by `set_callback`. + /// + /// The success/failure status of the request, along with notification of + /// completion, is delivered asynchronously through the `bi_end_io` callback + /// in bio. The bio must NOT be touched by the caller until ->bi_end_io() + /// has been called. + pub fn submit(&self) { + // SAFETY: `self.inner` is a valid bio. + unsafe { bindings::submit_bio(self.inner.as_ptr()) } + } + + /// Submits a bio (synchronously). + /// + /// # Panics + /// + /// User must ensure that this is an owned bio without a `bi_end_io` callback, + /// either one you have gotten with `alloc`, or `alloc_clone`. + pub fn submit_sync(&self) -> Result<()> { + // SAFETY: `self.inner` is a valid bio. + let err = unsafe { bindings::submit_bio_wait(self.inner.as_ptr()) }; + kernel::error::to_result(err) + } + + /// Ends the bio. + /// + /// This will end I/O on the whole bio. No one should call bi_end_io() + /// directly on a kernel's `bio` unless they own it and thus know that + /// it has an end_io function. + pub fn end(&self) { + // SAFETY: `self.inner` is a valid bio. + unsafe { bindings::bio_endio(self.inner.as_ptr()) } + } + + /// Returns an iterator on the bio_vec. + pub fn iter(&self) -> BioVecIter { + let bio = self.inner.as_ptr(); + // SAFETY: `self.inner` is a valid bio. + unsafe { + let vec = (*bio).bi_io_vec; + let iter = core::ptr::addr_of_mut!((*bio).bi_iter); + BioVecIter::from_raw(vec, iter) + } + } +} + +impl Drop for Bio { + fn drop(&mut self) { + // SAFETY: `self.inner` is a valid bio. + unsafe { + // Put a reference to a kernel's `bio`, either one you have gotten + // with `alloc`, `bio_get` or `alloc_clone`. The last put of a bio + // will free it. + bindings::bio_put(self.inner.as_ptr()); + } + } +} + +/// An iterator on `BioVec`. +pub struct BioVecIter { + iter: Opaque, + vec: NonNull, +} + +impl BioVecIter { + /// Constructs a `BioVecIter` from raw pointers. + /// + /// # Safety + /// + /// Caller must provide valid pointers to the `bvec_iter` and `bio_vec` + /// of a valid `bio`. + pub unsafe fn from_raw(vec: *mut bindings::bio_vec, iter: *mut bindings::bvec_iter) -> Self { + let opaque = Opaque::new(bindings::bvec_iter::default()); + *opaque.get() = *iter; + Self { + iter: opaque, + vec: NonNull::new_unchecked(vec), + } + } + + /// Returns true if the iterator has next `BioVec`. + fn has_next(&self) -> bool { + unsafe { (*self.iter.get()).bi_size > 0 } + } + + /// Returns the current item of the iterator. + fn item(&self) -> BioVec { + unsafe { + let expect_len = (*self.iter.get()).bi_size as usize; + let idx = (*self.iter.get()).bi_idx as usize; + let next = self.vec.as_ptr().add(idx); + BioVec::from_bio_vec(next, expect_len) + } + } + + /// Advances the iterator with current `BioVec`. + fn advance_by(&mut self, vec: &BioVec) { + unsafe { + (*self.iter.get()).bi_size -= vec.len() as u32; + (*self.iter.get()).bi_idx += 1; + } + } +} + +impl Iterator for BioVecIter { + type Item = BioVec; + + fn next(&mut self) -> Option { + if !self.has_next() { + return None; + } + let item = self.item(); + self.advance_by(&item); + Some(item) + } +} + +/// A contiguous range of physical memory addresses. +/// +/// Used to iterate the I/O buffer in `Bio`. +pub struct BioVec { + ptr: usize, + offset: usize, + len: usize, +} + +impl BioVec { + /// Constructs a `BioVec` from kernel's `bio_vec`. + /// + /// # Safety + /// + /// User must provide a valid pointer to kernel's `bio_vec`. + pub unsafe fn from_bio_vec(vec: *mut bindings::bio_vec, expect_len: usize) -> Self { + Self { + ptr: bindings::page_to_virt((*vec).bv_page) as _, + offset: (*vec).bv_offset as _, + len: expect_len.min((*vec).bv_len as _), + } + } + + /// Constructs a `BioVec` from raw pointer. + /// + /// # Safety + /// + /// User must ensure that `ptr` points to a valid and directly mapped page. + pub unsafe fn from_ptr(ptr: usize, offset: usize, len: usize) -> Self { + Self { ptr, offset, len } + } + + /// Returns the length of `BioVec`. + pub fn len(&self) -> usize { + self.len + } +} + +impl Deref for BioVec { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + unsafe { + let ptr = (self.ptr as *mut u8).add(self.offset); + core::slice::from_raw_parts(ptr, self.len) + } + } +} + +impl DerefMut for BioVec { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + let ptr = (self.ptr as *mut u8).add(self.offset); + core::slice::from_raw_parts_mut(ptr, self.len) + } + } +} + +/// Wrap the bio operations (see [req_op]). +/// +/// [`req_op`]: include/linux/blk_types.h +#[allow(missing_docs)] +#[repr(u8)] +#[derive(Clone, Copy, Debug)] +pub enum BioOp { + Read, + Write, + Flush, + Discard, + Undefined, +} + +impl From for BioOp { + fn from(value: u8) -> Self { + match value { + 0 => Self::Read, + 1 => Self::Write, + 2 => Self::Flush, + 3 => Self::Discard, + _ => Self::Undefined, + } + } +} + +/// A struct to carry the data for `bi_end_io`. +struct CallbackItem { + base: Bio, + data: T, + func: Box, +} + +impl CallbackItem { + pub fn try_new(base: Bio, data: T, func: F) -> Result> + where + F: FnOnce(&mut Bio, T) + 'static, + F: Send, + T: Send, + { + let func = Box::try_new(func)?; + Ok(Box::try_new(Self { base, data, func })?) + } +} diff --git a/linux/dm-sworndisk/src/block_device.rs b/linux/dm-sworndisk/src/block_device.rs new file mode 100644 index 0000000..e1dec43 --- /dev/null +++ b/linux/dm-sworndisk/src/block_device.rs @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Ant Group CO., Ltd. + +//! Block device. + +use core::{ops::Range, ptr::NonNull}; +use kernel::{prelude::*, types::Opaque}; + +pub const BLOCK_SIZE: usize = 1 << bindings::PAGE_SHIFT; +pub const BLOCK_SECTORS: usize = bindings::PAGE_SECTORS as usize; + +/// A wrapper for kernel's struct `block_device`. +pub struct BlockDevice { + inner: NonNull, +} + +// See definitions at `include/linux/blkdev.h`. +const BLK_OPEN_READ: bindings::blk_mode_t = 1 << 0; +const BLK_OPEN_WRITE: bindings::blk_mode_t = 1 << 1; + +impl BlockDevice { + /// Constructs a `BlockDevice` by path. + pub fn open(path: &CStr) -> Result { + let dev_ptr = unsafe { + bindings::blkdev_get_by_path( + path.as_char_ptr(), + BLK_OPEN_READ | BLK_OPEN_WRITE, + core::ptr::null_mut(), + core::ptr::null(), + ) + }; + if unsafe { kernel::bindings::IS_ERR(dev_ptr as _) } { + return Err(ENODEV); + } + Ok(Self { + inner: unsafe { NonNull::new_unchecked(dev_ptr) }, + }) + } + + /// Returns the raw pointer to kernel's struct `block_device`. + pub fn as_ptr(&self) -> *mut bindings::block_device { + self.inner.as_ptr() + } + + /// Returns the block range of the `block_device`. + pub fn region(&self) -> Range { + // SAFETY: `self.0` is borrowed from foreign pointer, should be valid. + let start_sector = unsafe { (*self.as_ptr()).bd_start_sect as usize }; + let nr_sectors = unsafe { (*self.as_ptr()).bd_nr_sectors as usize }; + let end_sector = start_sector + nr_sectors; + let sectors_per_block = BLOCK_SIZE / (bindings::SECTOR_SIZE as usize); + + Range { + start: (start_sector + sectors_per_block - 1) / sectors_per_block, + end: end_sector / sectors_per_block, + } + } +} + +impl Drop for BlockDevice { + fn drop(&mut self) { + unsafe { bindings::blkdev_put(self.as_ptr(), core::ptr::null_mut()) } + } +} + +/// Wrap the block error status values (see [blk_status_t]). +/// +/// [`blk_status_t`]: include/linux/blk_types.h +#[allow(missing_docs)] +#[repr(u32)] +#[derive(Clone, Copy, Debug)] +pub enum BlkStatus { + Ok, + NotSupp, + TimeOut, + NoSpc, + Transport, + Target, + Nexus, + Medium, + Protection, + Resource, + IoErr, + DmRequeue, + Again, + DevResource, + ZoneResource, + ZoneOpenResource, + ZoneActiveResource, + Offline, + Undefined, +} + +impl From for BlkStatus { + fn from(value: u32) -> Self { + match value { + 0 => Self::Ok, + 1 => Self::NotSupp, + 2 => Self::TimeOut, + 3 => Self::NoSpc, + 4 => Self::Transport, + 5 => Self::Target, + 6 => Self::Nexus, + 7 => Self::Medium, + 8 => Self::Protection, + 9 => Self::Resource, + 10 => Self::IoErr, + 11 => Self::DmRequeue, + 12 => Self::Again, + 13 => Self::DevResource, + 14 => Self::ZoneResource, + 15 => Self::ZoneOpenResource, + 16 => Self::ZoneActiveResource, + 17 => Self::Offline, + _ => Self::Undefined, + } + } +} diff --git a/linux/dm-sworndisk/src/device_mapper.rs b/linux/dm-sworndisk/src/device_mapper.rs new file mode 100644 index 0000000..91b604d --- /dev/null +++ b/linux/dm-sworndisk/src/device_mapper.rs @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Ant Group CO., Ltd. + +//! Device mapper. + +use core::{ + marker::PhantomData, + ops::{Index, Range}, + ptr::NonNull, +}; +use kernel::{error::to_result, prelude::*, str::CStr, types::Opaque}; + +use super::bio::Bio; +use super::block_device::{BlkStatus, BlockDevice, BLOCK_SECTORS, BLOCK_SIZE}; + +/// A trait declares operations that a device mapper target can do. +#[vtable] +pub trait TargetOperations: Sized { + /// Persist user data. + type Private: Sync; + + /// Constructor. The target will already have the table, type, begin and + /// len fields filled in. A `Private` struct can be returned to persist + /// its own context. + fn ctr(t: &mut Target, args: Args) -> Result>; + + /// Destructor. The target could clean up anything hidden in `Private`, + /// and `Private` itself can be dropped automatically. + fn dtr(t: &mut Target); + + /// Map block IOs. Return [`MapState`] to indicate how to handle the `bio` + /// later (end or resubmit). + fn map(t: &Target, bio: Bio) -> MapState; + + /// End the `bio`. Return [`EndState`] and [`BlkStatus`]. + #[allow(unused)] + fn end_io(t: &Target, bio: Bio) -> (EndState, BlkStatus) { + unreachable!() + } +} + +/// Wrap the kernel struct `target_type`. +/// +/// It contains a struct `list_head` for internal device-mapper use, so it +/// should be pinned. Users can use this struct to register/unregister their +/// own device mapper target. +#[pin_data(PinnedDrop)] +pub struct TargetType { + #[pin] + opaque: Opaque, +} + +/// Define target feature type, see `include/linux/device-mapper.h`. +pub type Features = u64; + +// SAFETY: It's OK to access `TargetType` from multiple threads. The +// `dm_register_target` and `dm_unregister_target` provides its own +// synchronization. +unsafe impl Sync for TargetType {} + +macro_rules! binding_target_operations { + ($target:expr, $(($op:ident, $method:ident, $func:ident),)+) => {$( + if ::$op { + (*$target).$method = Some(TargetType::$func::); + } + )+}; +} + +impl TargetType { + /// Provide an in-place constructor to register a new device mapper target. + pub fn register( + name: &'static CStr, + version: [u32; 3], + features: Features, + ) -> impl PinInit { + // SAFETY: `slot` is valid while the closure is called. + unsafe { + init::pin_init_from_closure(move |slot: *mut Self| { + // The `slot` contains uninit memory. Avoid creating a reference. + let opaque = core::ptr::addr_of!((*slot).opaque); + let target = Opaque::raw_get(opaque); + + (*target).module = &mut bindings::__this_module as _; + (*target).name = name.as_char_ptr(); + (*target).version = version; + (*target).features = features; + + binding_target_operations!( + target, + (HAS_CTR, ctr, dm_ctr_fn), + (HAS_DTR, dtr, dm_dtr_fn), + (HAS_MAP, map, dm_map_fn), + (HAS_END_IO, end_io, dm_endio_fn), + ); + + to_result(bindings::dm_register_target(target)) + }) + } + } +} + +#[pinned_drop] +impl PinnedDrop for TargetType { + fn drop(self: Pin<&mut Self>) { + // SAFETY: `self.opaque` are initialized by the `register` constructor, + // so it's valid. + unsafe { bindings::dm_unregister_target(self.opaque.get()) }; + } +} + +impl TargetType { + unsafe extern "C" fn dm_ctr_fn( + ti: *mut bindings::dm_target, + argc: core::ffi::c_uint, + argv: *mut *mut core::ffi::c_char, + ) -> core::ffi::c_int { + // SAFETY: the kernel splits arguments by `dm_split_args`, then pass + // suitable `argc` and `argv` to `dm_ctr_fn`. If `argc` is not zero, + // `argv` is non-null and valid. + let args = unsafe { Args::new(argc, argv) }; + + // SAFETY: the kernel should pass a valid `dm_target`. + let target = unsafe { Target::borrow_mut(ti) }; + T::ctr(target, args).map_or_else( + |e| e.to_errno(), + // SAFETY: the kernel should pass a valid `dm_target`. + |p| unsafe { + (*ti).private = Box::into_raw(p) as _; + 0 + }, + ) + } + + unsafe extern "C" fn dm_dtr_fn(ti: *mut bindings::dm_target) { + // SAFETY: the kernel should pass a valid `dm_target`. + let target = unsafe { Target::borrow_mut(ti) }; + T::dtr(target); + // SAFETY: `private` is constructed in `dm_ctr_fn`, and we drop it here. + unsafe { + let private = (*ti).private as *mut T::Private; + drop(Box::from_raw(private)); + (*ti).private = core::ptr::null_mut(); + } + } + + unsafe extern "C" fn dm_map_fn( + ti: *mut bindings::dm_target, + bio: *mut bindings::bio, + ) -> core::ffi::c_int { + // SAFETY: the kernel should pass a valid `dm_target` and `bio`. + unsafe { + let target = Target::borrow(ti); + let bio = Bio::from_raw(bio); + T::map(target, bio) as _ + } + } + + unsafe extern "C" fn dm_endio_fn( + ti: *mut bindings::dm_target, + bio: *mut bindings::bio, + error: *mut bindings::blk_status_t, + ) -> core::ffi::c_int { + // SAFETY: the kernel should pass valid `dm_target`, `bio` and + // `error` pointers. + unsafe { + let target = Target::borrow(ti); + let bio = Bio::from_raw(bio); + let (end_state, blk_status) = T::end_io(target, bio); + *error = blk_status as _; + end_state as _ + } + } +} + +/// Wrap the kernel struct `dm_target`. +/// +/// This struct represents a device mapper target. And the device mapper +/// core will alloc/free `dm_target` instances, so we just `borrow` it. +/// It also holds a `Private` struct, which is used to persist user's data, +/// and can be accessed by the `private` method. +pub struct Target { + opaque: Opaque, + _p: PhantomData<*mut T::Private>, +} + +impl Target { + /// Borrows the instance from a foreign pointer immutably. + /// + /// # Safety + /// + /// User must provide a valid pointer to the kernel's `dm_target`. + unsafe fn borrow<'a>(ptr: *const bindings::dm_target) -> &'a Self { + &*(ptr as *const Self) + } + + /// Borrows the instance from a foreign pointer mutably. + /// + /// # Safety + /// + /// User must provide a valid pointer to the kernel's `dm_target`. + unsafe fn borrow_mut<'a>(ptr: *mut bindings::dm_target) -> &'a mut Self { + &mut *(ptr as *mut Self) + } + + /// Access user's private data. + pub fn private(&self) -> Option<&T::Private> { + let target = self.opaque.get(); + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + // And '(*target).private' is assigned in `dm_ctr_fn`, it's also valid. + unsafe { ((*target).private as *const T::Private).as_ref() } + } + + /// Return the target name. + pub fn name(&self) -> &CStr { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { + let name = (*(*self.opaque.get()).type_).name; + CStr::from_char_ptr(name) + } + } + + /// Return the target version. + pub fn version(&self) -> [u32; 3] { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { (*(*self.opaque.get()).type_).version } + } + + /// Return the block range of the device mapper target. + pub fn region(&self) -> Range { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + let start_sector = unsafe { (*self.opaque.get()).begin as usize }; + let nr_sectors = unsafe { (*self.opaque.get()).len as usize }; + let end_sector = start_sector + nr_sectors; + let sectors_per_block = BLOCK_SIZE / (bindings::SECTOR_SIZE as usize); + + Range { + start: (start_sector + sectors_per_block - 1) / sectors_per_block, + end: end_sector / sectors_per_block, + } + } + + /// Set the block range of the device mapper target. + pub fn set_region(&mut self, blocks: Range) { + let start_sector = blocks.start * BLOCK_SECTORS; + let nr_sectors = blocks.len() * BLOCK_SECTORS; + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { + (*self.opaque.get()).begin = start_sector as _; + (*self.opaque.get()).len = nr_sectors as _; + } + } + + /// Return the number of zero-length barrier bios that will be submitted + /// to the target for the purpose of flushing cache. + pub fn num_flush_bios(&self) -> usize { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { (*self.opaque.get()).num_flush_bios as _ } + } + + /// Set the number of zero-length barrier bios that will be submitted + /// to the target for the purpose of flushing cache. + pub fn set_num_flush_bios(&mut self, num: usize) { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { (*self.opaque.get()).num_flush_bios = num as _ }; + } + + /// Return the number of discard bios. + pub fn num_discard_bios(&self) -> usize { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { (*self.opaque.get()).num_discard_bios as _ } + } + + /// Set the number of discard bios. + pub fn set_num_discard_bios(&mut self, num: usize) { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { (*self.opaque.get()).num_discard_bios = num as _ }; + } + + /// Set an error string for the target, could be used + /// by [`TargetOperations::ctr`]. + pub fn set_error(&mut self, err: &CStr) { + // SAFETY: `self.opaque` is borrowed from foreign pointer, should be valid. + unsafe { (*self.opaque.get()).error = err.as_char_ptr() as _ }; + } +} + +/// The return values of target map function, i.e., [`TargetOperations::map`]. +#[repr(u32)] +pub enum MapState { + /// The target will handle the io by resubmitting it later. + Submitted = bindings::DM_MAPIO_SUBMITTED, + + /// Simple remap complete. + Remapped = bindings::DM_MAPIO_REMAPPED, + + /// The target wants to requeue the io. + Requeue = bindings::DM_MAPIO_REQUEUE, + + /// The target wants to requeue the io after a delay. + DelayRequeue = bindings::DM_MAPIO_DELAY_REQUEUE, + + /// The target wants to complete the io. + Kill = bindings::DM_MAPIO_KILL, +} + +/// The return values of target end_io function. +#[repr(u32)] +pub enum EndState { + /// Ended successfully. + Done = bindings::DM_ENDIO_DONE, + + /// The io has still not completed (eg, multipath target might + /// want to requeue a failed io). + Incomplete = bindings::DM_ENDIO_INCOMPLETE, + + /// The target wants to requeue the io. + Requeue = bindings::DM_ENDIO_REQUEUE, + + /// The target wants to requeue the io after a delay. + DelayRequeue = bindings::DM_ENDIO_DELAY_REQUEUE, +} + +/// A struct wraps `c_char` arguments, and yields `CStr`. +pub struct Args { + argc: core::ffi::c_uint, + argv: *mut *mut core::ffi::c_char, +} + +impl Args { + /// The caller should ensure that the number of valid `argv` pointers + /// should be `argc` exactly. + pub unsafe fn new(argc: core::ffi::c_uint, argv: *mut *mut core::ffi::c_char) -> Self { + Self { argc, argv } + } + + /// Returns the number of arguments. + pub fn len(&self) -> usize { + self.argc as _ + } + + /// Returns the `nth` (from zero) argument. + /// + /// If the index is out of bounds, return `None`. + pub fn get(&self, index: usize) -> Option<&CStr> { + if self.argc == 0 || index >= self.argc as _ { + None + } else { + // SAFETY: the `new` caller should ensure the number of valid `argv`. + unsafe { Some(CStr::from_char_ptr(*self.argv.add(index))) } + } + } +} + +impl Index for Args { + type Output = CStr; + + /// When using the indexing operator(`[]`), the caller should check the + /// length of [`Args`]. If the index is out of bounds, this will [`panic`]. + fn index(&self, index: usize) -> &Self::Output { + if self.argc == 0 || index >= self.argc as _ { + panic!( + "Index out of bounds: the actual length is {} but the index is {}.", + self.argc, index + ) + } else { + // SAFETY: the `new` caller should ensure the number of valid `argv`. + unsafe { CStr::from_char_ptr(*self.argv.add(index)) } + } + } +} diff --git a/linux/dm-sworndisk/src/lib.rs b/linux/dm-sworndisk/src/lib.rs index 40d0dd7..d6fd21e 100644 --- a/linux/dm-sworndisk/src/lib.rs +++ b/linux/dm-sworndisk/src/lib.rs @@ -3,36 +3,346 @@ //! Dummy dm-sworndisk module. -use kernel::prelude::*; +#![feature(allocator_api)] + +mod bio; +mod block_device; +mod device_mapper; + +use bindings::thread::{spawn, JoinHandle, Thread}; +use core::{ + ops::{Deref, DerefMut, Range}, + sync::atomic::{AtomicBool, Ordering}, +}; +use crossbeam_queue::SegQueue; +use kernel::{c_str, new_condvar, prelude::*, sync::CondVar}; +use sworndisk::{ + error::{self, Errno, Error}, + return_errno_with_msg, Arc, BlockId, BlockSet, BufMut, BufRef, Mutex, Vec, +}; + +use crate::bio::{Bio, BioOp, BioVec}; +use crate::block_device::{BlkStatus, BlockDevice, BLOCK_SECTORS, BLOCK_SIZE}; +use crate::device_mapper::{Args, EndState, MapState, Target, TargetOperations, TargetType}; module! { - type: Dummy, + type: TargetManager, name: "dm_sworndisk", author: "Rust for Linux Contributors", description: "Rust dm_sworndisk module", license: "GPL", } -struct Dummy; +/// A struct used to manage a `target_type`. +/// +/// Register the type to kernel when `init` the module, and `unregister` it +/// when `drop` (automatically). +struct TargetManager(Pin>); -impl kernel::Module for Dummy { +impl kernel::Module for TargetManager { fn init(_module: &'static ThisModule) -> Result { pr_info!("Rust dm_sworndisk module (init)\n"); - test_rwlock(); - test_weak(); - test_thread(); + // TODO: add a switch to enable those tests. + // test_rwlock(); + // test_weak(); + // test_thread(); - Ok(Dummy) + let sworndisk = Box::pin_init(TargetType::register::( + c_str!("sworndisk"), + [0, 0, 1], + 0, + ))?; + + Ok(TargetManager(sworndisk)) } } -impl Drop for Dummy { +impl Drop for TargetManager { fn drop(&mut self) { pr_info!("Rust dm_sworndisk module (exit)\n"); } } +/// A request queue, dispatching bios from device mapper to `RawDisk`. +struct ReqQueue { + bios: Mutex>, + // TODO: replace raw disk with sworndisk. + disk: RawDisk, + should_stop: AtomicBool, + new_bio: Pin>, +} + +impl ReqQueue { + /// Constructs a `ReqQueue`. + pub fn new(disk: RawDisk) -> Self { + Self { + bios: Mutex::new(SegQueue::new()), + disk, + should_stop: AtomicBool::new(false), + new_bio: Box::pin_init(new_condvar!()).unwrap(), + } + } + + /// Returns true if the device mapper is going to exit. + fn should_stop(&self) -> bool { + self.should_stop.load(Ordering::Acquire) + } + + /// Set the `should_stop` flag. + /// + /// If the device mapper target is going to exit, user should call this + /// method, in order to tell the `ReqQueue` handler thread to clear all + /// the pending bios. + pub fn set_stopped(&self) { + self.should_stop.store(true, Ordering::Release); + self.new_bio.notify_all(); + } + + /// Enqueues a `Bio`. + pub fn enqueue(&self, bio: Bio) { + self.bios.lock().push(bio); + self.new_bio.notify_one(); + } + + /// A function to handle bio request. + /// + /// This should be used in a thread::spawn closure. + pub fn handler(queue: Arc) { + while !queue.should_stop() { + let mut bios = queue.bios.lock(); + let Some(bio) = bios.pop() else { + queue.new_bio.wait(&mut bios); + continue; + }; + queue.handle(bio); + } + queue.clear(); + } + + /// Dispatches the `Bio` from device mapper to `RawDisk`. + fn handle(&self, bio: Bio) { + if bio.start_sector() % BLOCK_SECTORS != 0 || bio.len() % BLOCK_SIZE != 0 { + pr_warn!( + "bio not aligned to BLOCK_SIZE, start_sector: {}, len: {}", + bio.start_sector(), + bio.len(), + ); + bio.end(); + return; + } + + match bio.op() { + BioOp::Read => { + let mut pos = bio.start_sector() / BLOCK_SECTORS; + for mut bio_vec in bio.iter() { + let buf = BufMut::try_from(bio_vec.deref_mut()).unwrap(); + let nblocks = buf.nblocks(); + if let Err(err) = self.disk.read(pos, buf) { + pr_info!( + "read sworndisk failed, block_id: {}, nblocks: {}, err: {:?}", + pos, + nblocks, + err, + ); + } + pos += nblocks; + } + } + BioOp::Write => { + let mut pos = bio.start_sector() / BLOCK_SECTORS; + for bio_vec in bio.iter() { + let buf = BufRef::try_from(bio_vec.deref()).unwrap(); + let nblocks = buf.nblocks(); + if let Err(err) = self.disk.write(pos, buf) { + pr_info!( + "write sworndisk failed, block_id: {}, nblocks: {}, err: {:?}", + pos, + nblocks, + err, + ); + } + pos += nblocks; + } + } + _ => unreachable!(), + } + bio.end(); + } + + /// Handles all the pending `Bio`s in the queue. + fn clear(&self) { + while let Some(bio) = self.bios.lock().pop() { + self.handle(bio); + } + } +} + +/// A struct represent a `dm_target` type, defines `TargetOperations`. +struct DmSwornDisk { + queue: Arc, + handler: Mutex>>, +} + +// SAFETY: `DmSwornDisk` could be used from all threads. +unsafe impl Sync for DmSwornDisk {} + +impl DmSwornDisk { + /// Returns an in-place initializer. + fn new(queue: Arc, handler: JoinHandle<()>) -> impl Init { + init!(Self { + queue, + handler: Mutex::new(Some(handler)) + }) + } +} + +/// A struct for host block device, which impl `BlockSet`. +struct RawDisk { + device: Arc>, + region: Range, +} + +unsafe impl Send for RawDisk {} +unsafe impl Sync for RawDisk {} + +impl RawDisk { + /// Constructs a `RawDisk`. + fn open(path: &CStr) -> Result { + let block_device = BlockDevice::open(path)?; + let region = block_device.region(); + Ok(Self { + device: Arc::new(Mutex::new(block_device)), + region, + }) + } +} + +impl BlockSet for RawDisk { + fn read(&self, pos: BlockId, buf: BufMut) -> Result<(), Error> { + if pos + buf.nblocks() > self.region.end { + return_errno_with_msg!(Errno::InvalidArgs, "read position is out of range"); + } + + let region = Range { + start: self.region.start + pos, + end: self.region.start + pos + buf.nblocks(), + }; + let bio_vec = + unsafe { BioVec::from_ptr(buf.as_slice().as_ptr() as _, 0, buf.as_slice().len()) }; + + let device = self.device.lock(); + let bio = Bio::alloc(&device, BioOp::Read, region, &[bio_vec]) + .map_err(|_| Error::with_msg(Errno::OutOfMemory, "alloc read bio failed"))?; + bio.submit_sync() + .map_err(|_| Error::with_msg(Errno::IoFailed, "read raw disk failed")) + } + + fn write(&self, pos: BlockId, buf: BufRef) -> Result<(), Error> { + if pos + buf.nblocks() > self.region.end { + return_errno_with_msg!(Errno::InvalidArgs, "write position is out of range"); + } + + let region = Range { + start: self.region.start + pos, + end: self.region.start + pos + buf.nblocks(), + }; + let bio_vec = + unsafe { BioVec::from_ptr(buf.as_slice().as_ptr() as _, 0, buf.as_slice().len()) }; + + let device = self.device.lock(); + let bio = Bio::alloc(&device, BioOp::Write, region, &[bio_vec]) + .map_err(|_| Error::with_msg(Errno::OutOfMemory, "alloc write bio failed"))?; + bio.submit_sync() + .map_err(|_| Error::with_msg(Errno::IoFailed, "write raw disk failed")) + } + + fn flush(&self) -> Result<(), Error> { + // TODO: issue a flush bio to host device. + Ok(()) + } + + fn subset(&self, range: Range) -> Result { + if self.region.start + range.end > self.region.end { + return_errno_with_msg!(Errno::InvalidArgs, "subset is out of range"); + } + + Ok(RawDisk { + device: self.device.clone(), + region: Range { + start: self.region.start + range.start, + end: self.region.start + range.end, + }, + }) + } + + fn nblocks(&self) -> usize { + self.region.len() + } +} + +#[vtable] +impl TargetOperations for DmSwornDisk { + type Private = DmSwornDisk; + + fn ctr(target: &mut Target, args: Args) -> Result> { + // TODO: accept more arguments, e.g., root key. + if args.len() != 1 { + target.set_error(c_str!("Invalid argument count")); + return Err(EINVAL); + } + + let Ok(raw_disk) = RawDisk::open(&args[0]) else { + target.set_error(c_str!("Device lookup failed")); + return Err(ENODEV); + }; + + // TODO: use raw_disk to construct a sworndisk instance. + let queue = Arc::new(ReqQueue::new(raw_disk)); + let queue_cloned = queue.clone(); + let handler = spawn(move || { + ReqQueue::handler(queue_cloned); + }); + + Box::init(DmSwornDisk::new(queue, handler)) + } + + fn dtr(target: &mut Target) { + let Some(sworndisk) = target.private() else { + pr_warn!("Error, found no dm_sworndisk\n"); + return; + }; + + sworndisk.queue.set_stopped(); + let handler = sworndisk.handler.lock().take().unwrap(); + handler.join().unwrap(); + } + + fn map(target: &Target, bio: Bio) -> MapState { + let Some(sworndisk) = target.private() else { + pr_warn!("Error, found no dm_sworndisk\n"); + return MapState::Kill; + }; + + match bio.op() { + BioOp::Read | BioOp::Write => { + sworndisk.queue.enqueue(bio); + return MapState::Submitted; + } + BioOp::Flush => { + pr_info!("flush unsupported"); + } + BioOp::Discard => { + pr_info!("discard unsupported"); + } + BioOp::Undefined => { + pr_info!("undefined operations"); + } + } + MapState::Kill + } +} + fn test_rwlock() { use bindings::new_rwlock;