diff --git a/docs/algorithms/kem/classic_mceliece.md b/docs/algorithms/kem/classic_mceliece.md
index b193065b5a..29c2d745e3 100644
--- a/docs/algorithms/kem/classic_mceliece.md
+++ b/docs/algorithms/kem/classic_mceliece.md
@@ -14,7 +14,7 @@
## Advisories
- Classic-McEliece-460896, Classic-McEliece-460896f, Classic-McEliece-6960119, and Classic-McEliece-6960119f parameter sets fail memory leak testing on x86-64 when building with ``clang`` using optimization level ``-O2`` and ``-O3``. Care is advised when using the algorithm at higher optimization levels, and any other compiler and architecture.
-- Current implementation of the algorithm may not be constant-time. Additionally, environment specific constant-time leaks may not be documented; please report potential constant-time leaks when found.
+- Current implementation of the algorithm may not be constant-time. Additionally, environment specific constant-time leaks may not be documented; please report potential constant-time leaks when found.
## Parameter set summary
@@ -35,8 +35,8 @@
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -46,8 +46,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -55,8 +55,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -64,8 +64,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -73,8 +73,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -82,8 +82,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -91,8 +91,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -100,8 +100,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -109,8 +109,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
@@ -118,8 +118,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | True | True | True |
-| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True |
+| [Primary Source](#primary-source) | clean | All | All | None | False | False | True |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True |
Are implementations chosen based on runtime CPU feature detection? **Yes**.
diff --git a/docs/algorithms/kem/classic_mceliece.yml b/docs/algorithms/kem/classic_mceliece.yml
index a5fcf751fc..4916af115e 100644
--- a/docs/algorithms/kem/classic_mceliece.yml
+++ b/docs/algorithms/kem/classic_mceliece.yml
@@ -26,7 +26,9 @@ advisories:
building with ``clang`` using optimization level ``-O2`` and ``-O3``. Care is advised
when using the algorithm at higher optimization levels, and any other compiler and
architecture.
-- Current implementation of the algorithm may not be constant-time. Additionally, environment specific constant-time leaks may not be documented; please report potential constant-time leaks when found.
+- Current implementation of the algorithm may not be constant-time. Additionally,
+ environment specific constant-time leaks may not be documented; please report potential
+ constant-time leaks when found.
parameter-sets:
- name: Classic-McEliece-348864
claimed-nist-level: 1
diff --git a/docs/algorithms/kem/kyber.md b/docs/algorithms/kem/kyber.md
index 9279672346..8191dfbfe8 100644
--- a/docs/algorithms/kem/kyber.md
+++ b/docs/algorithms/kem/kyber.md
@@ -7,9 +7,9 @@
- **Authors' website**: https://pq-crystals.org/
- **Specification version**: NIST Round 3 submission.
- **Primary Source**:
- - **Source**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches
+ - **Source**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches
- **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
-- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches
+- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches
- **pqclean-aarch64**:
- **Source**: https://github.com/PQClean/PQClean/commit/8e220a87308154d48fdfac40abbb191ac7fce06a with copy_from_upstream patches
- **Implementation license (SPDX-Identifier)**: CC0-1.0 and (CC0-1.0 or Apache-2.0) and (CC0-1.0 or MIT) and MIT
diff --git a/docs/algorithms/kem/kyber.yml b/docs/algorithms/kem/kyber.yml
index f3dc15ad76..096c5702a2 100644
--- a/docs/algorithms/kem/kyber.yml
+++ b/docs/algorithms/kem/kyber.yml
@@ -17,7 +17,7 @@ website: https://pq-crystals.org/
nist-round: 3
spec-version: NIST Round 3 submission
primary-upstream:
- source: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220
+ source: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196
with copy_from_upstream patches
spdx-license-identifier: CC0-1.0 or Apache-2.0
optimized-upstreams:
diff --git a/docs/algorithms/sig/falcon.md b/docs/algorithms/sig/falcon.md
index 101ffa9a98..08598e3b47 100644
--- a/docs/algorithms/sig/falcon.md
+++ b/docs/algorithms/sig/falcon.md
@@ -22,7 +22,7 @@
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | False | False | False |
+| [Primary Source](#primary-source) | clean | All | All | None | True | True | False |
| [Primary Source](#primary-source) | avx2 | x86\_64 | All | AVX2 | False | False | False |
| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | False | False | False |
@@ -34,7 +34,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
-| [Primary Source](#primary-source) | clean | All | All | None | False | False | False |
+| [Primary Source](#primary-source) | clean | All | All | None | True | True | False |
| [Primary Source](#primary-source) | avx2 | x86\_64 | All | AVX2 | False | False | False |
| [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | False | False | False |
diff --git a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c
index 017cacf5d6..3e73579e68 100644
--- a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c
+++ b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c
@@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
{
unsigned int i,j;
int16_t u;
+ uint32_t d0;
uint8_t t[8];
#if (KYBER_POLYCOMPRESSEDBYTES == 128)
@@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15;
+/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
}
r[0] = t[0] | (t[1] << 4);
@@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31;
+/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
+ d0 = u << 5;
+ d0 += 1664;
+ d0 *= 40318;
+ d0 >>= 27;
+ t[j] = d0 & 0x1f;
}
r[0] = (t[0] >> 0) | (t[1] << 5);
diff --git a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c
index 8420d069c2..669f6a5f1d 100644
--- a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c
+++ b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c
@@ -15,6 +15,7 @@
void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
{
unsigned int i,j,k;
+ uint64_t d0;
#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
uint16_t t[8];
@@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<8;k++) {
t[k] = a->vec[i].coeffs[8*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff;
+/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
}
r[ 0] = (t[0] >> 0);
@@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<4;k++) {
t[k] = a->vec[i].coeffs[4*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff;
+/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
}
r[0] = (t[0] >> 0);
diff --git a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c
index 017cacf5d6..3e73579e68 100644
--- a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c
+++ b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c
@@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
{
unsigned int i,j;
int16_t u;
+ uint32_t d0;
uint8_t t[8];
#if (KYBER_POLYCOMPRESSEDBYTES == 128)
@@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15;
+/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
}
r[0] = t[0] | (t[1] << 4);
@@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31;
+/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
+ d0 = u << 5;
+ d0 += 1664;
+ d0 *= 40318;
+ d0 >>= 27;
+ t[j] = d0 & 0x1f;
}
r[0] = (t[0] >> 0) | (t[1] << 5);
diff --git a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c
index 8420d069c2..669f6a5f1d 100644
--- a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c
+++ b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c
@@ -15,6 +15,7 @@
void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
{
unsigned int i,j,k;
+ uint64_t d0;
#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
uint16_t t[8];
@@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<8;k++) {
t[k] = a->vec[i].coeffs[8*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff;
+/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
}
r[ 0] = (t[0] >> 0);
@@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<4;k++) {
t[k] = a->vec[i].coeffs[4*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff;
+/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
}
r[0] = (t[0] >> 0);
diff --git a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c
index 017cacf5d6..3e73579e68 100644
--- a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c
+++ b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c
@@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
{
unsigned int i,j;
int16_t u;
+ uint32_t d0;
uint8_t t[8];
#if (KYBER_POLYCOMPRESSEDBYTES == 128)
@@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15;
+/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
}
r[0] = t[0] | (t[1] << 4);
@@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31;
+/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
+ d0 = u << 5;
+ d0 += 1664;
+ d0 *= 40318;
+ d0 >>= 27;
+ t[j] = d0 & 0x1f;
}
r[0] = (t[0] >> 0) | (t[1] << 5);
diff --git a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c
index 8420d069c2..669f6a5f1d 100644
--- a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c
+++ b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c
@@ -15,6 +15,7 @@
void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
{
unsigned int i,j,k;
+ uint64_t d0;
#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
uint16_t t[8];
@@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<8;k++) {
t[k] = a->vec[i].coeffs[8*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff;
+/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
}
r[ 0] = (t[0] >> 0);
@@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<4;k++) {
t[k] = a->vec[i].coeffs[4*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff;
+/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
}
r[0] = (t[0] >> 0);