Skip to content

Commit

Permalink
fix type confusion
Browse files Browse the repository at this point in the history
  • Loading branch information
cothan authored and ducnguyen-sb committed Sep 12, 2023
1 parent 44bc9c7 commit 7b06f61
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 56 deletions.
56 changes: 28 additions & 28 deletions src/sig/falcon/pqclean_falcon-1024_aarch64/poly_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,10 @@ void PQCLEAN_FALCON1024_AARCH64_poly_convert_to_unsigned(int16_t f[FALCON_N]) {
// Total SIMD registers: 26 = 8 + 16 + 2
uint16x8x4_t b0, b1; // 8
int16x8x4_t a0, a1, c0, c1; // 16
uint16x8_t neon_q, neon_2q; // 2
int16x8_t neon_q, neon_2q; // 2

neon_q = vdupq_n_u16(FALCON_Q);
neon_2q = vdupq_n_u16(FALCON_Q << 1);
neon_q = vdupq_n_s16(FALCON_Q);
neon_2q = vdupq_n_s16(FALCON_Q << 1);

for (int i = 0; i < FALCON_N; i += 64) {
vload_s16_x4(a0, &f[i]);
Expand Down Expand Up @@ -248,15 +248,15 @@ void PQCLEAN_FALCON1024_AARCH64_poly_convert_to_unsigned(int16_t f[FALCON_N]) {

// Conditional subtraction with FALCON_Q

c0.val[0] = vandq_s16(b0.val[0], neon_q);
c0.val[1] = vandq_s16(b0.val[1], neon_q);
c0.val[2] = vandq_s16(b0.val[2], neon_q);
c0.val[3] = vandq_s16(b0.val[3], neon_q);
c0.val[0] = vandq_s16(vreinterpretq_s16_u16(b0.val[0]), neon_q);
c0.val[1] = vandq_s16(vreinterpretq_s16_u16(b0.val[1]), neon_q);
c0.val[2] = vandq_s16(vreinterpretq_s16_u16(b0.val[2]), neon_q);
c0.val[3] = vandq_s16(vreinterpretq_s16_u16(b0.val[3]), neon_q);

c1.val[0] = vandq_s16(b1.val[0], neon_q);
c1.val[1] = vandq_s16(b1.val[1], neon_q);
c1.val[2] = vandq_s16(b1.val[2], neon_q);
c1.val[3] = vandq_s16(b1.val[3], neon_q);
c1.val[0] = vandq_s16(vreinterpretq_s16_u16(b1.val[0]), neon_q);
c1.val[1] = vandq_s16(vreinterpretq_s16_u16(b1.val[1]), neon_q);
c1.val[2] = vandq_s16(vreinterpretq_s16_u16(b1.val[2]), neon_q);
c1.val[3] = vandq_s16(vreinterpretq_s16_u16(b1.val[3]), neon_q);

vsub_x4(a0, a0, c0);
vsub_x4(a1, a1, c1);
Expand Down Expand Up @@ -301,15 +301,15 @@ int PQCLEAN_FALCON1024_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int1
c1.val[3] = vcgeq_s16(f.val[3], neon_q_2);

// Perform subtraction with Q
c0.val[0] = vandq_s16(c0.val[0], neon_q);
c0.val[1] = vandq_s16(c0.val[1], neon_q);
c0.val[2] = vandq_s16(c0.val[2], neon_q);
c0.val[3] = vandq_s16(c0.val[3], neon_q);
c0.val[0] = vandq_s16(vreinterpretq_s16_u16(c0.val[0]), neon_q);
c0.val[1] = vandq_s16(vreinterpretq_s16_u16(c0.val[1]), neon_q);
c0.val[2] = vandq_s16(vreinterpretq_s16_u16(c0.val[2]), neon_q);
c0.val[3] = vandq_s16(vreinterpretq_s16_u16(c0.val[3]), neon_q);

c1.val[0] = vandq_s16(c1.val[0], neon_q);
c1.val[1] = vandq_s16(c1.val[1], neon_q);
c1.val[2] = vandq_s16(c1.val[2], neon_q);
c1.val[3] = vandq_s16(c1.val[3], neon_q);
c1.val[0] = vandq_s16(vreinterpretq_s16_u16(c1.val[0]), neon_q);
c1.val[1] = vandq_s16(vreinterpretq_s16_u16(c1.val[1]), neon_q);
c1.val[2] = vandq_s16(vreinterpretq_s16_u16(c1.val[2]), neon_q);
c1.val[3] = vandq_s16(vreinterpretq_s16_u16(c1.val[3]), neon_q);

vsub_x4(a, a, c0);
vsub_x4(f, f, c1);
Expand All @@ -326,15 +326,15 @@ int PQCLEAN_FALCON1024_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int1
d1.val[3] = vcgtq_s16(neon__q_2, f.val[3]);

// Perform addition with Q
d0.val[0] = vandq_s16(d0.val[0], neon_q);
d0.val[1] = vandq_s16(d0.val[1], neon_q);
d0.val[2] = vandq_s16(d0.val[2], neon_q);
d0.val[3] = vandq_s16(d0.val[3], neon_q);

d1.val[0] = vandq_s16(d1.val[0], neon_q);
d1.val[1] = vandq_s16(d1.val[1], neon_q);
d1.val[2] = vandq_s16(d1.val[2], neon_q);
d1.val[3] = vandq_s16(d1.val[3], neon_q);
d0.val[0] = vandq_s16(vreinterpretq_s16_u16(d0.val[0]), neon_q);
d0.val[1] = vandq_s16(vreinterpretq_s16_u16(d0.val[1]), neon_q);
d0.val[2] = vandq_s16(vreinterpretq_s16_u16(d0.val[2]), neon_q);
d0.val[3] = vandq_s16(vreinterpretq_s16_u16(d0.val[3]), neon_q);

d1.val[0] = vandq_s16(vreinterpretq_s16_u16(d1.val[0]), neon_q);
d1.val[1] = vandq_s16(vreinterpretq_s16_u16(d1.val[1]), neon_q);
d1.val[2] = vandq_s16(vreinterpretq_s16_u16(d1.val[2]), neon_q);
d1.val[3] = vandq_s16(vreinterpretq_s16_u16(d1.val[3]), neon_q);

vadd_x4(a, a, d0);
vadd_x4(f, f, d1);
Expand Down
56 changes: 28 additions & 28 deletions src/sig/falcon/pqclean_falcon-512_aarch64/poly_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,10 @@ void PQCLEAN_FALCON512_AARCH64_poly_convert_to_unsigned(int16_t f[FALCON_N]) {
// Total SIMD registers: 26 = 8 + 16 + 2
uint16x8x4_t b0, b1; // 8
int16x8x4_t a0, a1, c0, c1; // 16
uint16x8_t neon_q, neon_2q; // 2
int16x8_t neon_q, neon_2q; // 2

neon_q = vdupq_n_u16(FALCON_Q);
neon_2q = vdupq_n_u16(FALCON_Q << 1);
neon_q = vdupq_n_s16(FALCON_Q);
neon_2q = vdupq_n_s16(FALCON_Q << 1);

for (int i = 0; i < FALCON_N; i += 64) {
vload_s16_x4(a0, &f[i]);
Expand Down Expand Up @@ -248,15 +248,15 @@ void PQCLEAN_FALCON512_AARCH64_poly_convert_to_unsigned(int16_t f[FALCON_N]) {

// Conditional subtraction with FALCON_Q

c0.val[0] = vandq_s16(b0.val[0], neon_q);
c0.val[1] = vandq_s16(b0.val[1], neon_q);
c0.val[2] = vandq_s16(b0.val[2], neon_q);
c0.val[3] = vandq_s16(b0.val[3], neon_q);
c0.val[0] = vandq_s16(vreinterpretq_s16_u16(b0.val[0]), neon_q);
c0.val[1] = vandq_s16(vreinterpretq_s16_u16(b0.val[1]), neon_q);
c0.val[2] = vandq_s16(vreinterpretq_s16_u16(b0.val[2]), neon_q);
c0.val[3] = vandq_s16(vreinterpretq_s16_u16(b0.val[3]), neon_q);

c1.val[0] = vandq_s16(b1.val[0], neon_q);
c1.val[1] = vandq_s16(b1.val[1], neon_q);
c1.val[2] = vandq_s16(b1.val[2], neon_q);
c1.val[3] = vandq_s16(b1.val[3], neon_q);
c1.val[0] = vandq_s16(vreinterpretq_s16_u16(b1.val[0]), neon_q);
c1.val[1] = vandq_s16(vreinterpretq_s16_u16(b1.val[1]), neon_q);
c1.val[2] = vandq_s16(vreinterpretq_s16_u16(b1.val[2]), neon_q);
c1.val[3] = vandq_s16(vreinterpretq_s16_u16(b1.val[3]), neon_q);

vsub_x4(a0, a0, c0);
vsub_x4(a1, a1, c1);
Expand Down Expand Up @@ -301,15 +301,15 @@ int PQCLEAN_FALCON512_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int16
c1.val[3] = vcgeq_s16(f.val[3], neon_q_2);

// Perform subtraction with Q
c0.val[0] = vandq_s16(c0.val[0], neon_q);
c0.val[1] = vandq_s16(c0.val[1], neon_q);
c0.val[2] = vandq_s16(c0.val[2], neon_q);
c0.val[3] = vandq_s16(c0.val[3], neon_q);
c0.val[0] = vandq_s16(vreinterpretq_s16_u16(c0.val[0]), neon_q);
c0.val[1] = vandq_s16(vreinterpretq_s16_u16(c0.val[1]), neon_q);
c0.val[2] = vandq_s16(vreinterpretq_s16_u16(c0.val[2]), neon_q);
c0.val[3] = vandq_s16(vreinterpretq_s16_u16(c0.val[3]), neon_q);

c1.val[0] = vandq_s16(c1.val[0], neon_q);
c1.val[1] = vandq_s16(c1.val[1], neon_q);
c1.val[2] = vandq_s16(c1.val[2], neon_q);
c1.val[3] = vandq_s16(c1.val[3], neon_q);
c1.val[0] = vandq_s16(vreinterpretq_s16_u16(c1.val[0]), neon_q);
c1.val[1] = vandq_s16(vreinterpretq_s16_u16(c1.val[1]), neon_q);
c1.val[2] = vandq_s16(vreinterpretq_s16_u16(c1.val[2]), neon_q);
c1.val[3] = vandq_s16(vreinterpretq_s16_u16(c1.val[3]), neon_q);

vsub_x4(a, a, c0);
vsub_x4(f, f, c1);
Expand All @@ -326,15 +326,15 @@ int PQCLEAN_FALCON512_AARCH64_poly_int16_to_int8(int8_t G[FALCON_N], const int16
d1.val[3] = vcgtq_s16(neon__q_2, f.val[3]);

// Perform addition with Q
d0.val[0] = vandq_s16(d0.val[0], neon_q);
d0.val[1] = vandq_s16(d0.val[1], neon_q);
d0.val[2] = vandq_s16(d0.val[2], neon_q);
d0.val[3] = vandq_s16(d0.val[3], neon_q);

d1.val[0] = vandq_s16(d1.val[0], neon_q);
d1.val[1] = vandq_s16(d1.val[1], neon_q);
d1.val[2] = vandq_s16(d1.val[2], neon_q);
d1.val[3] = vandq_s16(d1.val[3], neon_q);
d0.val[0] = vandq_s16(vreinterpretq_s16_u16(d0.val[0]), neon_q);
d0.val[1] = vandq_s16(vreinterpretq_s16_u16(d0.val[1]), neon_q);
d0.val[2] = vandq_s16(vreinterpretq_s16_u16(d0.val[2]), neon_q);
d0.val[3] = vandq_s16(vreinterpretq_s16_u16(d0.val[3]), neon_q);

d1.val[0] = vandq_s16(vreinterpretq_s16_u16(d1.val[0]), neon_q);
d1.val[1] = vandq_s16(vreinterpretq_s16_u16(d1.val[1]), neon_q);
d1.val[2] = vandq_s16(vreinterpretq_s16_u16(d1.val[2]), neon_q);
d1.val[3] = vandq_s16(vreinterpretq_s16_u16(d1.val[3]), neon_q);

vadd_x4(a, a, d0);
vadd_x4(f, f, d1);
Expand Down

0 comments on commit 7b06f61

Please sign in to comment.