Skip to content

Commit

Permalink
alternative to vaddv
Browse files Browse the repository at this point in the history
  • Loading branch information
karthikbhargavan committed May 22, 2024
1 parent 08e0d1b commit 94121bc
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions polynomials-aarch64/src/rejsample.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![forbid(unsafe_code)]
//#![forbid(unsafe_code)]

use crate::neon::*;

Expand Down Expand Up @@ -776,8 +776,16 @@ pub(crate) fn rej_sample(a: &[u8], out: &mut [i16]) -> usize {
let input = super::simd128ops::deserialize_12(a);
let mask0 = _vcleq_s16(input.low, fm);
let mask1 = _vcleq_s16(input.high, fm);
let used0 = _vaddvq_u16(_vandq_u16(mask0, bits));
let used1 = _vaddvq_u16(_vandq_u16(mask1, bits));
// let used0 = _vaddvq_u16(_vandq_u16(mask0, bits));
// let used1 = _vaddvq_u16(_vandq_u16(mask1, bits));

let mut u0 = [0u16;8];
let mut u1 = [0u16;8];
unsafe{vst1q_u16(u0.as_mut_ptr() as *mut u16, _vandq_u16(mask0, bits));}
unsafe{vst1q_u16(u1.as_mut_ptr() as *mut u16, _vandq_u16(mask1, bits));}
let used0 = u0[0] + u0[1] + u0[2] + u0[3] + u0[4] + u0[5] + u0[6] + u0[7];
let used1 = u1[0] + u1[1] + u1[2] + u1[3] + u1[4] + u1[5] + u1[6] + u1[7];

let pick0 = used0.count_ones();
let pick1 = used1.count_ones();

Expand Down

0 comments on commit 94121bc

Please sign in to comment.