diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index e054d483ca5..31e7a3617bc 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -2,6 +2,8 @@ #![feature( const_ptr_read, const_refs_to_cell, + const_maybe_uninit_as_mut_ptr, + const_mut_refs, convert_float_to_int, decl_macro, intra_doc_pointers, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 106f1965959..92984f55e45 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -176,34 +176,70 @@ where unsafe { &mut *(self as *mut Self as *mut [T; N]) } } + /// Load a vector from an array of `T`. + /// + /// This function is necessary since `repr(simd)` has padding for non-power-of-2 vectors (at the time of writing). + /// With padding, `read_unaligned` will read past the end of an array of N elements. + /// + /// # Safety + /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`. + const unsafe fn load(ptr: *const [T; N]) -> Self { + // There are potentially simpler ways to write this function, but this should result in + // LLVM `load ` + + let mut tmp = core::mem::MaybeUninit::::uninit(); + // SAFETY: `Simd` always contains `N` elements of type `T`. It may have padding + // which does not need to be initialized. The safety of reading `ptr` is ensured by the + // caller. + unsafe { + core::ptr::copy_nonoverlapping(ptr, tmp.as_mut_ptr().cast(), 1); + tmp.assume_init() + } + } + + /// Store a vector to an array of `T`. + /// + /// See `load` as to why this function is necessary. + /// + /// # Safety + /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`. + const unsafe fn store(self, ptr: *mut [T; N]) { + // There are potentially simpler ways to write this function, but this should result in + // LLVM `store ` + + // Creating a temporary helps LLVM turn the memcpy into a store. + let tmp = self; + // SAFETY: `Simd` always contains `N` elements of type `T`. The safety of writing + // `ptr` is ensured by the caller. + unsafe { core::ptr::copy_nonoverlapping(tmp.as_array(), ptr, 1) } + } + /// Converts an array to a SIMD vector. pub const fn from_array(array: [T; N]) -> Self { - // SAFETY: Transmuting between `Simd` and `[T; N]` - // is always valid. We need to use `read_unaligned` here, since - // the array may have a lower alignment than the vector. + // SAFETY: `&array` is safe to read. // - // FIXME: We currently use a pointer read instead of `transmute_copy` because - // it results in better codegen with optimizations disabled, but we should - // probably just use `transmute` once that works on const generic types. + // FIXME: We currently use a pointer load instead of `transmute_copy` because `repr(simd)` + // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). // // NOTE: This deliberately doesn't just use `Self(array)`, see the comment // on the struct definition for details. - unsafe { (&array as *const [T; N] as *const Self).read_unaligned() } + unsafe { Self::load(&array) } } /// Converts a SIMD vector to an array. pub const fn to_array(self) -> [T; N] { - // SAFETY: Transmuting between `Simd` and `[T; N]` - // is always valid. No need to use `read_unaligned` here, since - // the vector never has a lower alignment than the array. + let mut tmp = core::mem::MaybeUninit::uninit(); + // SAFETY: writing to `tmp` is safe and initializes it. // - // FIXME: We currently use a pointer read instead of `transmute_copy` because - // it results in better codegen with optimizations disabled, but we should - // probably just use `transmute` once that works on const generic types. + // FIXME: We currently use a pointer store instead of `transmute_copy` because `repr(simd)` + // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). // // NOTE: This deliberately doesn't just use `self.0`, see the comment // on the struct definition for details. - unsafe { (&self as *const Self as *const [T; N]).read() } + unsafe { + self.store(tmp.as_mut_ptr()); + tmp.assume_init() + } } /// Converts a slice to a SIMD vector containing `slice[..N]`.