diff --git a/src/ctx.rs b/src/ctx.rs index be22f7e27..12473fc1c 100644 --- a/src/ctx.rs +++ b/src/ctx.rs @@ -99,6 +99,32 @@ impl CaseSetter(&mut *buf, val); } + + /// # Safety + /// + /// `buf` must be correctly aligned and dereferencable (but need not be + /// initialized). + #[inline] + pub unsafe fn set_raw(&self, buf: *mut [T], val: T) { + assert!(buf.len() >= self.offset + self.len); + let buf = (buf as *mut T).add(self.offset); + match self.len { + 01 if UP_TO >= 01 => unsafe { *(buf as *mut [T; 01]) = [val; 01] }, + 02 if UP_TO >= 02 => unsafe { *(buf as *mut [T; 02]) = [val; 02] }, + 04 if UP_TO >= 04 => unsafe { *(buf as *mut [T; 04]) = [val; 04] }, + 08 if UP_TO >= 08 => unsafe { *(buf as *mut [T; 08]) = [val; 08] }, + 16 if UP_TO >= 16 => unsafe { *(buf as *mut [T; 16]) = [val; 16] }, + 32 if UP_TO >= 32 => unsafe { *(buf as *mut [T; 32]) = [val; 32] }, + 64 if UP_TO >= 64 => unsafe { *(buf as *mut [T; 64]) = [val; 64] }, + _ => { + if WITH_DEFAULT { + for i in 0..self.len { + unsafe { buf.add(i).write(val) }; + } + } + } + } + } } /// The entrypoint to the [`CaseSet`] API. diff --git a/src/lf_mask.rs b/src/lf_mask.rs index 8ec7ea3a4..b99729019 100644 --- a/src/lf_mask.rs +++ b/src/lf_mask.rs @@ -18,6 +18,7 @@ use libc::ptrdiff_t; use parking_lot::RwLock; use std::cmp; use std::ffi::c_int; +use std::mem::MaybeUninit; #[repr(C)] pub struct Av1FilterLUT { @@ -93,7 +94,7 @@ pub struct Av1Restoration { /// Instead of offsetting `txa`, the offsets are calculated from /// the existing `y_off` and `x_off` args and applied at each use site of `txa. fn decomp_tx( - txa: &mut [[[[u8; 32]; 32]; 2]; 2], + txa: &mut MaybeUninit>, from: TxfmSize, depth: usize, y_off: u8, @@ -128,15 +129,37 @@ fn decomp_tx( let lw = cmp::min(2, t_dim.lw); let lh = cmp::min(2, t_dim.lh); + let txa = txa.as_mut_ptr() as *mut [[[[u8; 32]; 32]; 2]; 2]; CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| { for y in 0..t_dim.h as usize { - case.set(&mut txa[0][0][y0 + y], lw); - case.set(&mut txa[1][0][y0 + y], lh); - txa[0][1][y0 + y][x0] = t_dim.w; + unsafe { + case.set_raw( + (((txa as *mut [[[u8; 32]; 32]; 2]).add(0) as *mut [[u8; 32]; 32]).add(0) + as *mut [u8; 32]) + .add(y0 + y), + lw, + ); + case.set_raw( + (((txa as *mut [[[u8; 32]; 32]; 2]).add(1) as *mut [[u8; 32]; 32]).add(0) + as *mut [u8; 32]) + .add(y0 + y), + lh, + ); + ((((txa as *mut [[[u8; 32]; 32]; 2]).add(0) as *mut [[u8; 32]; 32]).add(1) + as *mut [u8; 32]) + .add(y0 + y) as *mut u8) + .add(x0) + .write(t_dim.w); + } } }); - CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| { - case.set(&mut txa[1][1][y0], t_dim.h); + CaseSet::<16, false>::one((), t_dim.w as usize, x0, |case, ()| unsafe { + case.set_raw( + (((txa as *mut [[[u8; 32]; 32]; 2]).add(1) as *mut [[u8; 32]; 32]).add(1) + as *mut [u8; 32]) + .add(y0), + t_dim.h, + ); }); }; } @@ -157,14 +180,17 @@ fn mask_edges_inter( let t_dim = &dav1d_txfm_dimensions[max_tx as usize]; // See [`decomp_tx`]'s docs for the `txa` arg. - let mut txa = Align16([[[[0; 32]; 32]; 2]; 2]); + let mut txa: MaybeUninit> = MaybeUninit::uninit(); for (y_off, _) in (0..h4).step_by(t_dim.h as usize).enumerate() { for (x_off, _) in (0..w4).step_by(t_dim.w as usize).enumerate() { - decomp_tx(&mut txa.0, max_tx, 0, y_off as u8, x_off as u8, tx_masks); + decomp_tx(&mut txa, max_tx, 0, y_off as u8, x_off as u8, tx_masks); } } + // SAFETY: Calls to `decomp_tx` above fully initializes the `txa` array + let txa = unsafe { txa.assume_init() }; + // left block edge for y in 0..h4 { let mask = 1u32 << (by4 + y);