Skip to content

Commit

Permalink
Add MaybeUninit type
Browse files Browse the repository at this point in the history
The standard library's `MaybeUninit` type does not currently support
wrapping unsized types. This commit introduces a polyfill with the same
behavior as `MaybeUninit` which does support wrapping unsized types.

In this commit, the only supported types are sized types and slice
types. Later (as part of #29), we will add the ability to derive the
`AsMaybeUninit` trait, which will extend support to custom DSTs.

TODO: Figure out how to get rid of KnownLayout<MaybeUninit =
mem::MaybeUninit<T>> bounds.

Makes progress on #29
  • Loading branch information
joshlf committed Sep 6, 2023
1 parent 6de0dfd commit 7ddaeb4
Show file tree
Hide file tree
Showing 3 changed files with 482 additions and 15 deletions.
198 changes: 187 additions & 11 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ use core::{
fmt::{self, Debug, Display, Formatter},
hash::Hasher,
marker::PhantomData,
mem::{self, ManuallyDrop, MaybeUninit},
mem::{self, ManuallyDrop},
num::{
NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128,
NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, Wrapping,
Expand Down Expand Up @@ -229,6 +229,31 @@ pub unsafe trait KnownLayout: sealed::KnownLayoutSealed {
#[doc(hidden)]
const TRAILING_SLICE_ELEM_SIZE: Option<usize>;

/// A type which has the same layout as `Self`, but which has no validity
/// constraints.
///
/// Roughly speaking, this type is equivalent to what the standard library's
/// [`MaybeUninit<Self>`] would be if it supported unsized types.
///
/// # Safety
///
/// For `T: KnownLayout`, the following must hold:
/// - Given `m: T::MaybeUninit`, it is sound to write any byte value,
/// including an uninitialized byte, at any byte offset in `m`
/// - `T` and `T::MaybeUninit` have the same alignment requirement
/// - It is valid to use an `as` cast to convert a `t: *const T` to a `m:
/// *const T::MaybeUninit` and vice-versa (and likewise for `*mut T`/`*mut
/// T::MaybeUninit`). Regardless of which direction the conversion was
/// performed, the sizes of the pointers' referents are always equal (in
/// terms of an API which is not yet stable, `size_of_val_raw(t) ==
/// size_of_val_raw(m)`).
/// - `T::MaybeUninit` contains [`UnsafeCell`]s at exactly the same byte
/// ranges that `T` does.
///
/// [`MaybeUninit<Self>`]: core::mem::MaybeUninit
/// [`UnsafeCell`]: core::cell::UnsafeCell
type MaybeUninit: ?Sized + KnownLayout;

/// Validates that the memory region at `addr` of length `bytes_len`
/// satisfies `Self`'s size and alignment requirements, returning `(elems,
/// split_at, prefix_suffix_bytes)`.
Expand Down Expand Up @@ -302,6 +327,24 @@ pub unsafe trait KnownLayout: sealed::KnownLayoutSealed {
/// elements in its trailing slice.
#[doc(hidden)]
fn raw_from_ptr_len(bytes: NonNull<u8>, elems: usize) -> NonNull<Self>;

/// Converts a pointer at the type level.
///
/// # Safety
///
/// Callers may assume that the memory region addressed by the return value
/// is the same as that addressed by the argument, and that both the return
/// value and the argument have the same provenance.
fn cast_from_maybe_uninit(maybe_uninit: NonNull<Self::MaybeUninit>) -> NonNull<Self>;

/// Converts a pointer at the type level.
///
/// # Safety
///
/// Callers may assume that the memory region addressed by the return value
/// is the same as that addressed by the argument, and that both the return
/// value and the argument have the same provenance.
fn cast_to_maybe_uninit(slf: NonNull<Self>) -> NonNull<Self::MaybeUninit>;
}

impl<T: KnownLayout> sealed::KnownLayoutSealed for [T] {}
Expand All @@ -321,6 +364,22 @@ unsafe impl<T: KnownLayout> KnownLayout for [T] {
};
const TRAILING_SLICE_ELEM_SIZE: Option<usize> = Some(mem::size_of::<T>());

// SAFETY:
// - `MaybeUninit` has no bit validity requirements and `[U]` has the same
// bit validity requirements as `U`, so `[MaybeUninit<T>]` has no bit
// validity requirements. Thus, it is sound to write any byte value,
// including an uninitialized byte, at any byte offset.
// - Since `MaybeUninit<T>` has the same layout as `T`, and `[U]` has the
// same alignment as `U`, `[MaybeUninit<T>]` has the same alignment as
// `[T]`.
// - `[T]` and `[MaybeUninit<T>]` are both slice types, and so pointers can
// be converted using an `as` cast. Since `T` and `MaybeUninit<T>` have
// the same size, and since such a cast preserves the number of elements
// in the slice, the referent slices themselves will have the same size.
// - `MaybeUninit<T>` has the same field offsets as `[T]`, and so it
// contains `UnsafeCell`s at exactly the same byte ranges as `[T]`.
type MaybeUninit = [mem::MaybeUninit<T>];

// SAFETY: `.cast` preserves address and provenance. The returned pointer
// refers to an object with `elems` elements by construction.
#[inline(always)]
Expand All @@ -329,6 +388,20 @@ unsafe impl<T: KnownLayout> KnownLayout for [T] {
#[allow(unstable_name_collisions)]
NonNull::slice_from_raw_parts(data.cast::<T>(), elems)
}

fn cast_from_maybe_uninit(maybe_uninit: NonNull<[mem::MaybeUninit<T>]>) -> NonNull<[T]> {
let (ptr, len) = (maybe_uninit.cast::<T>(), maybe_uninit.len());
// TODO(#67): Remove this allow. See NonNullExt for more details.
#[allow(unstable_name_collisions)]
NonNull::slice_from_raw_parts(ptr, len)
}

fn cast_to_maybe_uninit(slf: NonNull<[T]>) -> NonNull<[mem::MaybeUninit<T>]> {
let (ptr, len) = (slf.cast::<mem::MaybeUninit<T>>(), slf.len());
// TODO(#67): Remove this allow. See NonNullExt for more details.
#[allow(unstable_name_collisions)]
NonNull::slice_from_raw_parts(ptr, len)
}
}

/// Implements `KnownLayout` for a sized type.
Expand Down Expand Up @@ -365,11 +438,36 @@ macro_rules! impl_known_layout {
// `T` is sized so it has no trailing slice.
const TRAILING_SLICE_ELEM_SIZE: Option<usize> = None;

// SAFETY:
// - `MaybeUninit` has no validity requirements, so it is sound to
// write any byte value, including an uninitialized byte, at any
// offset.
// - `MaybeUninit<T>` has the same layout as `T`, so they have the
// same alignment requirement. For the same reason, their sizes
// are equal.
// - Since their sizes are equal, raw pointers to both types are
// thin pointers, and thus can be converted using as casts. For
// the same reason, the sizes of these pointers' referents are
// always equal.
// - `MaybeUninit<T>` has the same field offsets as `T`, and so it
// contains `UnsafeCell`s at exactly the same byte ranges as `T`.
type MaybeUninit = mem::MaybeUninit<$ty>;

// SAFETY: `.cast` preserves address and provenance.
#[inline(always)]
fn raw_from_ptr_len(bytes: NonNull<u8>, _elems: usize) -> NonNull<Self> {
bytes.cast::<Self>()
}

// SAFETY: `.cast` preserves pointer address and provenance.
fn cast_from_maybe_uninit(maybe_uninit: NonNull<Self::MaybeUninit>) -> NonNull<Self> {
maybe_uninit.cast::<Self>()
}

// SAFETY: `.cast` preserves pointer address and provenance.
fn cast_to_maybe_uninit(slf: NonNull<Self>) -> NonNull<Self::MaybeUninit> {
slf.cast::<Self::MaybeUninit>()
}
}
};
}
Expand All @@ -385,16 +483,85 @@ impl_known_layout!(
impl_known_layout!(T => Option<T>);
impl_known_layout!(T: ?Sized => PhantomData<T>);
impl_known_layout!(T => Wrapping<T>);
impl_known_layout!(T => MaybeUninit<T>);
impl_known_layout!(T => mem::MaybeUninit<T>);
impl_known_layout!(const N: usize, T => [T; N]);

safety_comment! {
/// SAFETY:
/// `str` and `ManuallyDrop<[T]>` have the same representations as `[u8]`
/// and `[T]` repsectively. `str` has different bit validity than `[u8]`,
/// but that doesn't affect the soundness of this impl.
/// and `[T]` repsectively, including with respect to the locations of
/// `UnsafeCell`s. `str` has different bit validity than `[u8]`, but that
/// doesn't affect the soundness of this impl.
unsafe_impl_known_layout!(#[repr([u8])] str);
unsafe_impl_known_layout!(T: ?Sized + KnownLayout => #[repr(T)] ManuallyDrop<T>);
/// SAFETY:
/// `Cell<T>` and `UnsafeCell<T>` have the same representations, including
/// (trivially) with respect to the locations of `UnsafeCell`s.
unsafe_impl_known_layout!(T: ?Sized + KnownLayout => #[repr(cell::UnsafeCell<T>)] cell::Cell<T>);
}

impl<T: ?Sized + sealed::KnownLayoutSealed> sealed::KnownLayoutSealed for cell::UnsafeCell<T> {}
// SAFETY: See inline comments.
unsafe impl<T: ?Sized + KnownLayout> KnownLayout for cell::UnsafeCell<T> {
// SAFETY: `UnsafeCell<T>` and `T` have the same size, alignment, and
// trailing element size.
const FIXED_PREFIX_SIZE: usize = <T as KnownLayout>::FIXED_PREFIX_SIZE;
const ALIGN: NonZeroUsize = <T as KnownLayout>::ALIGN;
const TRAILING_SLICE_ELEM_SIZE: Option<usize> = <T as KnownLayout>::TRAILING_SLICE_ELEM_SIZE;

// SAFETY:
// - By `MaybeUninit` invariant, it is sound to write any byte - including
// an uninitialized byte - at any byte offset in
// `UnsafeCell<T::MaybeUninit>`.
// - `UnsafeCell<T>` and `T` have the same size, alignment, and trailing
// element size. Also, by `MaybeUninit` invariants:
// - `T` and `T::MaybeUninit` have the same alignment.
// - It is valid to cast `*const T` to `*const T::MaybeUninit` and
// vice-versa (and likewise for `*mut`), and these operations preserve
// pointer referent size.
//
// Thus, these properties hold between `UnsafeCell<T>` and
// `UnsafeCell<T::MaybeUninit>`.
// - `UnsafeCell<T>` and `UnsafeCell<T::MaybeUninit>` trivially have
// `UnsafeCell`s in exactly the same locations.
type MaybeUninit = cell::UnsafeCell<<T as KnownLayout>::MaybeUninit>;

// SAFETY: All operations preserve address and provenance. Caller
// has promised that the `as` cast preserves size.
#[inline(always)]
fn raw_from_ptr_len(bytes: NonNull<u8>, elems: usize) -> NonNull<Self> {
let slf = T::raw_from_ptr_len(bytes, elems).as_ptr();
#[allow(clippy::as_conversions)]
let slf = slf as *mut cell::UnsafeCell<T>;
// SAFETY: `.as_ptr()` called on a non-null pointer.
unsafe { NonNull::new_unchecked(slf) }
}

// SAFETY: All operations preserve pointer address and provenance.
fn cast_from_maybe_uninit(maybe_uninit: NonNull<Self::MaybeUninit>) -> NonNull<Self> {
#[allow(clippy::as_conversions)]
let maybe_uninit = maybe_uninit.as_ptr() as *mut <T as KnownLayout>::MaybeUninit;
// SAFETY: `.as_ptr()` called on a non-null pointer.
let maybe_uninit = unsafe { NonNull::new_unchecked(maybe_uninit) };
let repr = <T as KnownLayout>::cast_from_maybe_uninit(maybe_uninit).as_ptr();
#[allow(clippy::as_conversions)]
let slf = repr as *mut Self;
// SAFETY: `.as_ptr()` called on non-null pointer.
unsafe { NonNull::new_unchecked(slf) }
}

// SAFETY: `.cast` preserves pointer address and provenance.
fn cast_to_maybe_uninit(slf: NonNull<Self>) -> NonNull<Self::MaybeUninit> {
#[allow(clippy::as_conversions)]
let repr = slf.as_ptr() as *mut T;
// SAFETY: `.as_ptr()` called on non-null pointer.
let repr = unsafe { NonNull::new_unchecked(repr) };
let maybe_uninit = <T as KnownLayout>::cast_to_maybe_uninit(repr).as_ptr();
#[allow(clippy::as_conversions)]
let maybe_uninit = maybe_uninit as *mut cell::UnsafeCell<T::MaybeUninit>;
// SAFETY: `.as_ptr()` called on non-null pointer.
unsafe { NonNull::new_unchecked(maybe_uninit) }
}
}

/// Types for which a sequence of bytes all set to zero represents a valid
Expand Down Expand Up @@ -1201,17 +1368,16 @@ safety_comment! {
/// - `Unaligned`: `MaybeUninit<T>` is guaranteed by its documentation [1]
/// to have the same alignment as `T`.
///
/// [1]
/// https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1
/// [1] https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1
///
/// TODO(https://github.com/google/zerocopy/issues/251): If we split
/// `FromBytes` and `RefFromBytes`, or if we introduce a separate
/// `NoCell`/`Freeze` trait, we can relax the trait bounds for `FromZeroes`
/// and `FromBytes`.
unsafe_impl!(T: FromZeroes => FromZeroes for MaybeUninit<T>);
unsafe_impl!(T: FromBytes => FromBytes for MaybeUninit<T>);
unsafe_impl!(T: Unaligned => Unaligned for MaybeUninit<T>);
assert_unaligned!(MaybeUninit<()>, MaybeUninit<u8>);
unsafe_impl!(T: FromZeroes => FromZeroes for mem::MaybeUninit<T>);
unsafe_impl!(T: FromBytes => FromBytes for mem::MaybeUninit<T>);
unsafe_impl!(T: Unaligned => Unaligned for mem::MaybeUninit<T>);
assert_unaligned!(mem::MaybeUninit<()>, mem::MaybeUninit<u8>);
}
safety_comment! {
/// SAFETY:
Expand Down Expand Up @@ -3716,8 +3882,11 @@ mod tests {
assert_impls!(Option<NonZeroUsize>: FromZeroes, FromBytes, AsBytes, !Unaligned);
assert_impls!(Option<NonZeroIsize>: FromZeroes, FromBytes, AsBytes, !Unaligned);

// Implements none of the ZC traits.
// Implements none of the ZC traits, but implements `KnownLayout` so
// that types like `MaybeUninit<KnownLayout>` can at least be written
// down without causing errors so that we can test them.
struct NotZerocopy;
impl_known_layout!(NotZerocopy);

assert_impls!(PhantomData<NotZerocopy>: FromZeroes, FromBytes, AsBytes, Unaligned);
assert_impls!(PhantomData<[u8]>: FromZeroes, FromBytes, AsBytes, Unaligned);
Expand All @@ -3727,8 +3896,15 @@ mod tests {
assert_impls!(ManuallyDrop<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);
assert_impls!(ManuallyDrop<[NotZerocopy]>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);

assert_impls!(mem::MaybeUninit<u8>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(mem::MaybeUninit<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);

assert_impls!(MaybeUninit<u8>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<MaybeUninit<u8>>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<[u8]>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<MaybeUninit<[u8]>>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);
assert_impls!(MaybeUninit<MaybeUninit<NotZerocopy>>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);

assert_impls!(Wrapping<u8>: FromZeroes, FromBytes, AsBytes, Unaligned);
assert_impls!(Wrapping<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);
Expand Down
42 changes: 38 additions & 4 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
/// The macro invocations are emitted, each decorated with the following
/// attribute: `#[allow(clippy::undocumented_unsafe_blocks)]`.
macro_rules! safety_comment {
(#[doc = r" SAFETY:"] $(#[doc = $_doc:literal])* $($macro:ident!$args:tt;)*) => {
(#[doc = r" SAFETY:"] $(#[doc = $_doc:literal])* $($macro:ident!$args:tt; $(#[doc = r" SAFETY:"] $(#[doc = $__doc:literal])*)?)*) => {
#[allow(clippy::undocumented_unsafe_blocks)]
const _: () = { $($macro!$args;)* };
}
Expand Down Expand Up @@ -199,6 +199,7 @@ macro_rules! impl_or_verify {
/// - Fixed prefix size
/// - Alignment
/// - (For DSTs) trailing slice element size
/// - `UnsafeCell`s covering exactly the same byte ranges
/// - It must be valid to perform an `as` cast from `*mut $repr` to `*mut $ty`,
/// and this operation must preserve referent size (ie, `size_of_val_raw`).
macro_rules! unsafe_impl_known_layout {
Expand All @@ -211,14 +212,47 @@ macro_rules! unsafe_impl_known_layout {
const ALIGN: NonZeroUsize = <$repr as KnownLayout>::ALIGN;
const TRAILING_SLICE_ELEM_SIZE: Option<usize> = <$repr as KnownLayout>::TRAILING_SLICE_ELEM_SIZE;

// SAFETY:
// - By `MaybeUninit` invariant, it is sound to write any byte -
// including an uninitialized byte - at any byte offset in
// `$repr::MaybeUninit`.
// - Caller has promised that `$ty` and `$repr` have the same
// alignment, size, trailing element size, and `UnsafeCell`
// locations. Also, by `MaybeUninit` invariants:
// - `$repr` and `$repr::MaybeUninit` have the same alignment.
// - It is valid to cast `*const $repr` to `*const
// $repr::MaybeUninit` and vice-versa (and likewise for `*mut`),
// and these operations preserve pointer referent size.
// - `$repr` and `$repr::MaybeUninit` contain `UnsafeCell`s at
// exactly the same byte ranges.
//
// Thus, all of the same properties hold between `$ty` and
// `$repr::MaybeUninit`.
type MaybeUninit = <$repr as KnownLayout>::MaybeUninit;

// SAFETY: All operations preserve address and provenance. Caller
// has promised that the `as` cast preserves size.
#[inline(always)]
fn raw_from_ptr_len(bytes: NonNull<u8>, elems: usize) -> NonNull<Self> {
Self::cast_from_maybe_uninit(Self::MaybeUninit::raw_from_ptr_len(bytes, elems))
}

// SAFETY: All operations preserve pointer address and provenance.
fn cast_from_maybe_uninit(maybe_uninit: NonNull<Self::MaybeUninit>) -> NonNull<Self> {
let repr = <$repr as KnownLayout>::cast_from_maybe_uninit(maybe_uninit).as_ptr();
#[allow(clippy::as_conversions)]
let slf = repr as *mut Self;
// SAFETY: `.as_ptr()` called on non-null pointer.
unsafe { NonNull::new_unchecked(slf) }
}

// SAFETY: `.cast` preserves pointer address and provenance.
fn cast_to_maybe_uninit(slf: NonNull<Self>) -> NonNull<Self::MaybeUninit> {
#[allow(clippy::as_conversions)]
let ptr = <$repr>::raw_from_ptr_len(bytes, elems).as_ptr() as *mut Self;
// SAFETY: `ptr` was converted from `bytes`, which is non-null.
unsafe { NonNull::new_unchecked(ptr) }
let repr = slf.as_ptr() as *mut $repr;
// SAFETY: `.as_ptr()` called on non-null pointer.
let repr = unsafe { NonNull::new_unchecked(repr) };
<$repr as KnownLayout>::cast_to_maybe_uninit(repr)
}
}
};
Expand Down
Loading

0 comments on commit 7ddaeb4

Please sign in to comment.