diff --git a/src/flatten.rs b/src/flatten.rs new file mode 100644 index 0000000..67fcc85 --- /dev/null +++ b/src/flatten.rs @@ -0,0 +1,409 @@ +//! Flat representation of regions. + +use crate::{FlatStack, Iter, Region}; +use std::marker::PhantomData; +use std::ops::Deref; + +/// TODO +pub trait FlatWrite { + /// TODO + fn write_lengthened(&mut self, data: &[T]) -> std::io::Result<()>; + /// TODO + fn write_unit(&mut self, unit: &T) -> std::io::Result<()>; + + /// TODO + fn lengthened_size(data: &[T], offset: &mut usize); + /// TODO + fn unit_size(unit: &T, offset: &mut usize); +} + +/// TODO +pub struct DefaultFlatWrite { + inner: W, + offset: usize, + alignment: usize, +} + +/// TODO +const ALIGNMENT: usize = 64; + +impl DefaultFlatWrite { + const NULLS: [u8; ALIGNMENT - 1] = [0; ALIGNMENT - 1]; + + /// TODO + pub fn new(inner: W) -> Self { + Self { + inner, + offset: 0, + alignment: 0, + } + } + + fn pad(&mut self) -> std::io::Result<()> { + let padding = (self.offset as *const u8).align_offset(std::mem::align_of::()); + self.alignment = std::cmp::max(self.alignment, std::mem::align_of::()); + self.inner.write_all(&Self::NULLS[..padding])?; + self.offset += padding; + Ok(()) + } + + fn pad_size(offset: &mut usize) { + *offset += (*offset as *const u8).align_offset(std::mem::align_of::()); + } + + /// TODO + pub fn finish(mut self) -> std::io::Result<()> { + let alignment: u8 = self + .alignment + .next_power_of_two() + .trailing_zeros() + .try_into() + .unwrap(); + self.write_unit(&alignment) + } + + /// TODO + pub fn finish_size(offset: &mut usize) { + Self::unit_size(&0u8, offset); + } +} + +impl FlatWrite for DefaultFlatWrite { + fn write_lengthened(&mut self, data: &[T]) -> std::io::Result<()> { + println!( + "write_lengthened data len: {}*{}", + data.len(), + std::mem::size_of::() + ); + self.write_unit(&data.len())?; + self.pad::()?; + let data: &[u8] = unsafe { + std::slice::from_raw_parts(data.as_ptr().cast(), std::mem::size_of_val(data)) + }; + println!( + "write_lengthened data len: {}*{}", + data.len(), + std::mem::size_of::() + ); + self.inner.write_all(data)?; + self.offset += data.len(); + Ok(()) + } + + fn write_unit(&mut self, unit: &T) -> std::io::Result<()> { + self.pad::()?; + let slice = std::slice::from_ref(unit); + let bytes = unsafe { + std::slice::from_raw_parts(slice.as_ptr() as *const u8, std::mem::size_of_val(slice)) + }; + self.inner.write_all(bytes)?; + self.offset += bytes.len(); + Ok(()) + } + + fn lengthened_size(data: &[T], offset: &mut usize) { + Self::unit_size(&data.len(), offset); + Self::pad_size::(offset); + let data: &[u8] = unsafe { + std::slice::from_raw_parts(data.as_ptr().cast(), std::mem::size_of_val(data)) + }; + *offset += data.len(); + } + + fn unit_size(unit: &T, offset: &mut usize) { + Self::pad_size::(offset); + let slice = std::slice::from_ref(unit); + let bytes = unsafe { + std::slice::from_raw_parts(slice.as_ptr() as *const u8, std::mem::size_of_val(slice)) + }; + *offset += bytes.len(); + } +} + +/// TODO +#[derive(Clone, Copy, Debug, Default)] +pub struct DerefWrapper(pub S); + +impl Deref for DerefWrapper> +where + S: Deref, +{ + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.0.deref().deref() + } +} + +impl Deref for DerefWrapper> +where + S: Deref, +{ + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.0.deref().deref() + } +} + +/// TODO +pub struct Bytes { + buffer: S, + start: usize, + end: usize, +} + +impl Bytes +where + S: Deref + Clone, +{ + /// TODO + pub fn new_aligned(buffer: S, start: usize, end: usize) -> Self { + if end - start > 1 { + println!("asdf"); + let alignment = 1 << Bytes::new(&buffer.deref()[end - 1..], 0, 1).read_unit::(); + println!("alignment: {alignment}"); + let offset = buffer.deref()[start..].as_ptr().align_offset(alignment); + assert_eq!( + offset, + 0, + "Unaliged memory: {:?} off by {} bytes", + buffer.deref().as_ptr(), + offset + ); + } + Self { buffer, start, end } + } + + /// TODO + pub fn new(buffer: S, start: usize, end: usize) -> Self { + Self { buffer, start, end } + } + + /// TODO + pub fn read_lengthened(&mut self) -> TypedBytes { + let len = self.read_unit::(); + let (head, _data, _tail) = unsafe { self.buffer[self.start..].align_to::() }; + let end = self.start + head.len() + len * std::mem::size_of::(); + let bytes = Self::new(self.buffer.clone(), self.start + head.len(), end); + self.start = end; + TypedBytes { + bytes, + _marker: PhantomData, + } + } + + /// TODO + pub fn read_unit(&mut self) -> T { + let (head, data, _tail) = unsafe { self.buffer[self.start..].align_to::() }; + self.start += head.len() + std::mem::size_of::(); + data[0] + } + + /// Call `callback` with `size`, `capacity` for each allocation. + pub fn heap_size(&self, mut callback: F) { + callback(self.end - self.start, self.buffer.len()); + } +} + +impl Deref for Bytes +where + S: Deref, +{ + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.buffer[self.start..self.end] + } +} + +/// TODO +pub struct TypedBytes { + pub(crate) bytes: Bytes, + _marker: PhantomData, +} + +impl Default for TypedBytes +where + S: Default + Deref + Clone, +{ + fn default() -> Self { + Self { + bytes: Bytes::new(S::default(), 0, 0), + _marker: PhantomData, + } + } +} + +impl Deref for TypedBytes +where + S: Deref, +{ + type Target = [T]; + + fn deref(&self) -> &Self::Target { + let (head, data, _tail) = unsafe { self.bytes.deref().align_to::() }; + assert_eq!(head.len(), 0, "Unaligned memory"); + data + } +} + +/// TODO +pub trait Entomb { + /// TODO + fn entomb(&self, write: &mut W) -> std::io::Result<()>; + + /// TODO + fn flat_size(&self, offset: &mut usize); +} + +/// TODO +pub trait Exhume { + /// TODO + type Flat: Region; // where S: Deref + Clone + Default; + + /// TODO + fn exhume(buffer: &mut Bytes) -> std::io::Result + where + S: Deref + Clone + Default; +} + +impl FlatStack +where + R: Region + Entomb, +{ + /// TODO + pub fn entomb(&self, write: &mut W) -> std::io::Result<()> { + write.write_lengthened(&self.indices)?; + self.region.entomb(write) + } + + /// TODO + pub fn flat_size(&self, offset: &mut usize) { + W::lengthened_size(&self.indices, offset); + self.region.flat_size::(offset); + } +} + +impl FlatStack +where + R: Region, +{ + /// TODO + pub fn exhume(buffer: &mut Bytes) -> std::io::Result> + where + S: Deref + Clone + Default, + R: Exhume, + { + let indices = buffer.read_lengthened(); + let region = R::exhume(buffer)?; + Ok(ZeroCopyFlatStack { indices, region }) + } +} + +/// TODO +pub struct ZeroCopyFlatStack +where + R: Region, +{ + indices: TypedBytes, + region: R, +} + +impl ZeroCopyFlatStack +where + S: Deref, + R: Region, +{ + /// TODO + pub fn iter(&self) -> Iter { + self.into_iter() + } +} + +impl<'a, S, R> IntoIterator for &'a ZeroCopyFlatStack +where + S: Deref, + R: Region, +{ + type Item = R::ReadItem<'a>; + type IntoIter = Iter<'a, R>; + + fn into_iter(self) -> Self::IntoIter { + Iter { + inner: self.indices.deref().iter(), + region: &self.region, + } + } +} + +#[cfg(test)] +mod tests { + use crate::flatten::{Bytes, DefaultFlatWrite, DerefWrapper, Entomb, Exhume, ALIGNMENT}; + use crate::{OwnedRegion, Push, Region, StringRegion}; + use std::io::Write; + use std::rc::Rc; + + #[test] + fn test_flatten_slice() { + let mut buffer = Vec::new(); + let mut write = DefaultFlatWrite::new(&mut buffer); + + let mut region = OwnedRegion::default(); + let index = region.push("abc".as_bytes()); + + region.entomb(&mut write).unwrap(); + write.finish().unwrap(); + + println!("{:?}", buffer); + let end = buffer.len(); + + let mut read = Bytes::new_aligned(&buffer[..], 0, end); + + let flat = OwnedRegion::::exhume(&mut read).unwrap(); + assert_eq!("abc".as_bytes(), flat.index(index)); + } + + #[test] + fn test_flatten_string() { + let mut buffer = Vec::new(); + let mut write = DefaultFlatWrite::new(&mut buffer); + + let mut region = ::default(); + let index = region.push("abc"); + let index2 = region.push("defghij"); + + let mut other_region = OwnedRegion::default(); + let other_index = other_region.push([0x11223344566778899u128; 16]); + + let mut offset = 0; + region.flat_size::>>(&mut offset); + other_region.flat_size::>>(&mut offset); + >>::finish_size(&mut offset); + + region.entomb(&mut write).unwrap(); + other_region.entomb(&mut write).unwrap(); + write.finish().unwrap(); + + assert_eq!(offset, buffer.len()); + + let mut aligned_buffer = vec![0u8; buffer.len() + ALIGNMENT]; + let offset = aligned_buffer.as_ptr().align_offset(ALIGNMENT); + println!("aligning to offset {offset}"); + (&mut aligned_buffer[offset..]) + .write_all(&buffer[..]) + .unwrap(); + + println!("{:?}", buffer); + + let end = buffer.len(); + let mut read = + Bytes::new_aligned(DerefWrapper(Rc::new(aligned_buffer)), offset, end + offset); + + let flat = ::exhume(&mut read).unwrap(); + assert_eq!("abc", flat.index(index)); + assert_eq!("defghij", flat.index(index2)); + let other_flat = OwnedRegion::::exhume(&mut read).unwrap(); + assert_eq!(other_flat.index(other_index), [0x11223344566778899u128; 16]); + } +} diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index 6229dfc..c9a9921 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -222,6 +222,91 @@ where } } +mod flatten { + use crate::flatten::{Bytes, Entomb, Exhume, FlatWrite, TypedBytes}; + use crate::{OwnedRegion, Region}; + use std::ops::Deref; + + impl Entomb for OwnedRegion { + fn entomb(&self, write: &mut W) -> std::io::Result<()> { + write.write_lengthened(&self.slices) + } + + fn flat_size(&self, offset: &mut usize) { + W::lengthened_size(&self.slices, offset); + } + } + + impl> Exhume for OwnedRegion { + type Flat = BorrowedRegion; + fn exhume(bytes: &mut Bytes) -> std::io::Result { + Ok(BorrowedRegion { + bytes: bytes.read_lengthened(), + }) + } + } + + /// TODO + pub struct BorrowedRegion { + bytes: TypedBytes, + } + + impl Default for BorrowedRegion + where + S: Default + Deref + Clone, + { + fn default() -> Self { + Self { + bytes: TypedBytes::default(), + } + } + } + + impl Region for BorrowedRegion + where + S: Deref + Default + Clone, + T: Copy + 'static, + { + type Owned = <[T] as ToOwned>::Owned; + type ReadItem<'a> = &'a [T] where Self: 'a; + type Index = (usize, usize); + + fn merge_regions<'a>(_regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + todo!() + } + + #[inline] + fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { + &self.bytes.deref()[start..end] + } + + fn reserve_regions<'a, I>(&mut self, _regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + todo!() + } + + fn clear(&mut self) { + todo!() + } + + fn heap_size(&self, callback: F) { + self.bytes.bytes.heap_size(callback); + } + + fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> + where + Self: 'a, + { + item + } + } +} #[cfg(test)] mod tests { use crate::{CopyIter, Push, Region, ReserveItems}; diff --git a/src/impls/string.rs b/src/impls/string.rs index 1403b88..bcbb1bc 100644 --- a/src/impls/string.rs +++ b/src/impls/string.rs @@ -32,10 +32,7 @@ use crate::{Containerized, Push, Region, ReserveItems}; /// ``` #[derive(Default, Debug, Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct StringRegion> -where - for<'a> R: Region = &'a [u8]> + 'a, -{ +pub struct StringRegion> { inner: R, } @@ -91,6 +88,37 @@ where } } +mod flatten { + use crate::flatten::{Bytes, Entomb, Exhume, FlatWrite}; + use crate::{Region, StringRegion}; + use std::ops::Deref; + + impl Entomb for StringRegion { + fn entomb(&self, write: &mut W) -> std::io::Result<()> { + self.inner.entomb(write) + } + + fn flat_size(&self, offset: &mut usize) { + self.inner.flat_size::(offset) + } + } + + impl Exhume for StringRegion + where + // for<'a> R: ReadRegion = &'a [u8]> + 'a, + // for<'a, 'b> R::Flat<'a>: ReadRegion = &'b [u8]> + 'b, + S: Clone + Default + Deref, + for<'a> R: Exhume + Region = &'a [u8]> + 'a, + for<'a> R::Flat: Region = &'a [u8]> + 'a, + { + type Flat = StringRegion; + + fn exhume(buffer: &mut Bytes) -> std::io::Result { + R::exhume(buffer).map(|inner| StringRegion { inner }) + } + } +} + impl Containerized for String { type Region = StringRegion; } diff --git a/src/lib.rs b/src/lib.rs index fbd7bbc..833ca9d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ use std::fmt::{Debug, Formatter}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +pub mod flatten; pub mod impls; pub use impls::columns::ColumnsRegion;