From 84dcf8570020d6c9b785dad9b27ebb0fd9db60ed Mon Sep 17 00:00:00 2001 From: Pravek Sharma Date: Fri, 5 Jan 2024 16:10:29 +0100 Subject: [PATCH] Run copy_from_upstream.py -k --- docs/algorithms/kem/classic_mceliece.md | 42 +++++++++---------- docs/algorithms/kem/classic_mceliece.yml | 4 +- docs/algorithms/kem/kyber.md | 4 +- docs/algorithms/kem/kyber.yml | 2 +- docs/algorithms/sig/falcon.md | 4 +- .../pqcrystals-kyber_kyber1024_ref/poly.c | 15 ++++++- .../pqcrystals-kyber_kyber1024_ref/polyvec.c | 17 +++++++- .../pqcrystals-kyber_kyber512_ref/poly.c | 15 ++++++- .../pqcrystals-kyber_kyber512_ref/polyvec.c | 17 +++++++- .../pqcrystals-kyber_kyber768_ref/poly.c | 15 ++++++- .../pqcrystals-kyber_kyber768_ref/polyvec.c | 17 +++++++- 11 files changed, 113 insertions(+), 39 deletions(-) diff --git a/docs/algorithms/kem/classic_mceliece.md b/docs/algorithms/kem/classic_mceliece.md index b193065b5a..29c2d745e3 100644 --- a/docs/algorithms/kem/classic_mceliece.md +++ b/docs/algorithms/kem/classic_mceliece.md @@ -14,7 +14,7 @@ ## Advisories - Classic-McEliece-460896, Classic-McEliece-460896f, Classic-McEliece-6960119, and Classic-McEliece-6960119f parameter sets fail memory leak testing on x86-64 when building with ``clang`` using optimization level ``-O2`` and ``-O3``. Care is advised when using the algorithm at higher optimization levels, and any other compiler and architecture. -- Current implementation of the algorithm may not be constant-time. Additionally, environment specific constant-time leaks may not be documented; please report potential constant-time leaks when found. +- Current implementation of the algorithm may not be constant-time. Additionally, environment specific constant-time leaks may not be documented; please report potential constant-time leaks when found. ## Parameter set summary @@ -35,8 +35,8 @@ | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -46,8 +46,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -55,8 +55,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -64,8 +64,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -73,8 +73,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -82,8 +82,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -91,8 +91,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -100,8 +100,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -109,8 +109,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -118,8 +118,8 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | True | True | True | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | True | True | +| [Primary Source](#primary-source) | clean | All | All | None | False | False | True | +| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,POPCNT,BMI1 | False | False | True | Are implementations chosen based on runtime CPU feature detection? **Yes**. diff --git a/docs/algorithms/kem/classic_mceliece.yml b/docs/algorithms/kem/classic_mceliece.yml index a5fcf751fc..4916af115e 100644 --- a/docs/algorithms/kem/classic_mceliece.yml +++ b/docs/algorithms/kem/classic_mceliece.yml @@ -26,7 +26,9 @@ advisories: building with ``clang`` using optimization level ``-O2`` and ``-O3``. Care is advised when using the algorithm at higher optimization levels, and any other compiler and architecture. -- Current implementation of the algorithm may not be constant-time. Additionally, environment specific constant-time leaks may not be documented; please report potential constant-time leaks when found. +- Current implementation of the algorithm may not be constant-time. Additionally, + environment specific constant-time leaks may not be documented; please report potential + constant-time leaks when found. parameter-sets: - name: Classic-McEliece-348864 claimed-nist-level: 1 diff --git a/docs/algorithms/kem/kyber.md b/docs/algorithms/kem/kyber.md index 9279672346..8191dfbfe8 100644 --- a/docs/algorithms/kem/kyber.md +++ b/docs/algorithms/kem/kyber.md @@ -7,9 +7,9 @@ - **Authors' website**: https://pq-crystals.org/ - **Specification version**: NIST Round 3 submission. - **Primary Source**: - - **Source**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches + - **Source**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0 -- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches +- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches - **pqclean-aarch64**: - **Source**: https://github.com/PQClean/PQClean/commit/8e220a87308154d48fdfac40abbb191ac7fce06a with copy_from_upstream patches - **Implementation license (SPDX-Identifier)**: CC0-1.0 and (CC0-1.0 or Apache-2.0) and (CC0-1.0 or MIT) and MIT diff --git a/docs/algorithms/kem/kyber.yml b/docs/algorithms/kem/kyber.yml index f3dc15ad76..096c5702a2 100644 --- a/docs/algorithms/kem/kyber.yml +++ b/docs/algorithms/kem/kyber.yml @@ -17,7 +17,7 @@ website: https://pq-crystals.org/ nist-round: 3 spec-version: NIST Round 3 submission primary-upstream: - source: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 + source: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches spdx-license-identifier: CC0-1.0 or Apache-2.0 optimized-upstreams: diff --git a/docs/algorithms/sig/falcon.md b/docs/algorithms/sig/falcon.md index 101ffa9a98..08598e3b47 100644 --- a/docs/algorithms/sig/falcon.md +++ b/docs/algorithms/sig/falcon.md @@ -22,7 +22,7 @@ | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------| -| [Primary Source](#primary-source) | clean | All | All | None | False | False | False | +| [Primary Source](#primary-source) | clean | All | All | None | True | True | False | | [Primary Source](#primary-source) | avx2 | x86\_64 | All | AVX2 | False | False | False | | [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | False | False | False | @@ -34,7 +34,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| -| [Primary Source](#primary-source) | clean | All | All | None | False | False | False | +| [Primary Source](#primary-source) | clean | All | All | None | True | True | False | | [Primary Source](#primary-source) | avx2 | x86\_64 | All | AVX2 | False | False | False | | [Primary Source](#primary-source) | aarch64 | ARM64\_V8 | Linux,Darwin | None | False | False | False | diff --git a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c index 017cacf5d6..3e73579e68 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c @@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) { unsigned int i,j; int16_t u; + uint32_t d0; uint8_t t[8]; #if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); @@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c index 8420d069c2..669f6a5f1d 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c @@ -15,6 +15,7 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) { unsigned int i,j,k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<8;k++) { t[k] = a->vec[i].coeffs[8*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<4;k++) { t[k] = a->vec[i].coeffs[4*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c index 017cacf5d6..3e73579e68 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c @@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) { unsigned int i,j; int16_t u; + uint32_t d0; uint8_t t[8]; #if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); @@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c index 8420d069c2..669f6a5f1d 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c @@ -15,6 +15,7 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) { unsigned int i,j,k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<8;k++) { t[k] = a->vec[i].coeffs[8*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<4;k++) { t[k] = a->vec[i].coeffs[4*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c index 017cacf5d6..3e73579e68 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c @@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) { unsigned int i,j; int16_t u; + uint32_t d0; uint8_t t[8]; #if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); @@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c index 8420d069c2..669f6a5f1d 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c @@ -15,6 +15,7 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) { unsigned int i,j,k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<8;k++) { t[k] = a->vec[i].coeffs[8*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<4;k++) { t[k] = a->vec[i].coeffs[4*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0);