From f31686be40739f80750e8d7f1f277e8f1ecb6a99 Mon Sep 17 00:00:00 2001 From: Pravek Sharma Date: Tue, 2 Jan 2024 21:27:33 +0100 Subject: [PATCH 1/6] Update copy_from_upstream.py --- scripts/copy_from_upstream/copy_from_upstream.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml index 7d6b4228df..783f73dcec 100644 --- a/scripts/copy_from_upstream/copy_from_upstream.yml +++ b/scripts/copy_from_upstream/copy_from_upstream.yml @@ -25,7 +25,7 @@ upstreams: name: pqcrystals-kyber git_url: https://github.com/pq-crystals/kyber.git git_branch: master - git_commit: dda29cc63af721981ee2c831cf00822e69be3220 + git_commit: 272125f6acc8e8b6850fd68ceb901a660ff48196 kem_meta_path: '{pretty_name_full}_META.yml' kem_scheme_path: '.' patches: [pqcrystals-kyber-yml.patch, pqcrystals-kyber-ref-shake-aes.patch, pqcrystals-kyber-avx2-shake-aes.patch] From e98391903b7e90b2b56fd5e197ecebfae0a596da Mon Sep 17 00:00:00 2001 From: Pravek Sharma Date: Tue, 2 Jan 2024 21:30:55 +0100 Subject: [PATCH 2/6] Run copy_from_upstream.py --- docs/algorithms/kem/kyber.md | 4 ++-- docs/algorithms/kem/kyber.yml | 2 +- .../kyber/pqcrystals-kyber_kyber1024_ref/poly.c | 15 +++++++++++++-- .../pqcrystals-kyber_kyber1024_ref/polyvec.c | 17 +++++++++++++++-- .../kyber/pqcrystals-kyber_kyber512_ref/poly.c | 15 +++++++++++++-- .../pqcrystals-kyber_kyber512_ref/polyvec.c | 17 +++++++++++++++-- .../kyber/pqcrystals-kyber_kyber768_ref/poly.c | 15 +++++++++++++-- .../pqcrystals-kyber_kyber768_ref/polyvec.c | 17 +++++++++++++++-- 8 files changed, 87 insertions(+), 15 deletions(-) diff --git a/docs/algorithms/kem/kyber.md b/docs/algorithms/kem/kyber.md index 2666f70853..6a6dc4afdc 100644 --- a/docs/algorithms/kem/kyber.md +++ b/docs/algorithms/kem/kyber.md @@ -7,9 +7,9 @@ - **Authors' website**: https://pq-crystals.org/ - **Specification version**: NIST Round 3 submission. - **Primary Source**: - - **Source**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches + - **Source**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0 -- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches +- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches - **oldpqclean-aarch64**: - **Source**: https://github.com/PQClean/PQClean/commit/8e220a87308154d48fdfac40abbb191ac7fce06a with copy_from_upstream patches - **Implementation license (SPDX-Identifier)**: CC0-1.0 and (CC0-1.0 or Apache-2.0) and (CC0-1.0 or MIT) and MIT diff --git a/docs/algorithms/kem/kyber.yml b/docs/algorithms/kem/kyber.yml index a04c44df6c..6cdf341327 100644 --- a/docs/algorithms/kem/kyber.yml +++ b/docs/algorithms/kem/kyber.yml @@ -17,7 +17,7 @@ website: https://pq-crystals.org/ nist-round: 3 spec-version: NIST Round 3 submission primary-upstream: - source: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 + source: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches spdx-license-identifier: CC0-1.0 or Apache-2.0 optimized-upstreams: diff --git a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c index 017cacf5d6..3e73579e68 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/poly.c @@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) { unsigned int i,j; int16_t u; + uint32_t d0; uint8_t t[8]; #if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); @@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c index 8420d069c2..669f6a5f1d 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber1024_ref/polyvec.c @@ -15,6 +15,7 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) { unsigned int i,j,k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<8;k++) { t[k] = a->vec[i].coeffs[8*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<4;k++) { t[k] = a->vec[i].coeffs[4*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c index 017cacf5d6..3e73579e68 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/poly.c @@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) { unsigned int i,j; int16_t u; + uint32_t d0; uint8_t t[8]; #if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); @@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c index 8420d069c2..669f6a5f1d 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber512_ref/polyvec.c @@ -15,6 +15,7 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) { unsigned int i,j,k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<8;k++) { t[k] = a->vec[i].coeffs[8*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<4;k++) { t[k] = a->vec[i].coeffs[4*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c index 017cacf5d6..3e73579e68 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/poly.c @@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) { unsigned int i,j; int16_t u; + uint32_t d0; uint8_t t[8]; #if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); @@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c index 8420d069c2..669f6a5f1d 100644 --- a/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c +++ b/src/kem/kyber/pqcrystals-kyber_kyber768_ref/polyvec.c @@ -15,6 +15,7 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) { unsigned int i,j,k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<8;k++) { t[k] = a->vec[i].coeffs[8*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<4;k++) { t[k] = a->vec[i].coeffs[4*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); From fd59f01de6d9239fe75fdb2d115bab1ca9438c09 Mon Sep 17 00:00:00 2001 From: Spencer Wilson Date: Tue, 2 Jan 2024 16:40:38 -0500 Subject: [PATCH 3/6] Update Kyber ARM patch to reflect pq-crystals/kyber@272125f --- ...an-kyber-armneon-variable-timing-fix.patch | 205 +++++++++++++++++- 1 file changed, 199 insertions(+), 6 deletions(-) diff --git a/scripts/copy_from_upstream/patches/pqclean-kyber-armneon-variable-timing-fix.patch b/scripts/copy_from_upstream/patches/pqclean-kyber-armneon-variable-timing-fix.patch index 071c58e243..fa856d2bee 100644 --- a/scripts/copy_from_upstream/patches/pqclean-kyber-armneon-variable-timing-fix.patch +++ b/scripts/copy_from_upstream/patches/pqclean-kyber-armneon-variable-timing-fix.patch @@ -1,8 +1,31 @@ +8f8d63fd708e00cc941ade03f405de21fdf17410 diff --git a/crypto_kem/kyber1024/aarch64/poly.c b/crypto_kem/kyber1024/aarch64/poly.c -index 1dfa52c..02e010b 100644 +index 1dfa52c..3115d1c 100644 --- a/crypto_kem/kyber1024/aarch64/poly.c +++ b/crypto_kem/kyber1024/aarch64/poly.c -@@ -207,14 +207,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +@@ -51,6 +51,7 @@ + void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) { + unsigned int i, j; + int16_t u; ++ uint32_t d0; + uint8_t t[8]; + + for (i = 0; i < KYBER_N / 8; i++) { +@@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N + // map to positive standard representatives + u = a[8 * i + j]; + u += (u >> 15) & KYBER_Q; +- t[j] = ((((uint32_t)u << 5) + KYBER_Q / 2) / KYBER_Q) & 31; ++ // t[j] = ((((uint32_t)u << 5) + KYBER_Q / 2) / KYBER_Q) & 31; ++ d0 = u << 5; ++ d0 += 1664; ++ d0 *= 40318; ++ d0 >>= 27; ++ t[j] = d0 & 0x1f; + } + + r[0] = (t[0] >> 0) | (t[1] << 5); +@@ -207,14 +213,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES]) **************************************************/ void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const int16_t a[KYBER_N]) { unsigned int i, j; @@ -25,11 +48,75 @@ index 1dfa52c..02e010b 100644 msg[i] |= t << j; } } +diff --git a/crypto_kem/kyber1024/aarch64/polyvec.c b/crypto_kem/kyber1024/aarch64/polyvec.c +index d400348..f9a1ebf 100644 +--- a/crypto_kem/kyber1024/aarch64/polyvec.c ++++ b/crypto_kem/kyber1024/aarch64/polyvec.c +@@ -21,6 +21,7 @@ + **************************************************/ + void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) { + unsigned int i, j, k; ++ uint64_t d0; + + #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; +@@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K + for (k = 0; k < 8; k++) { + t[k] = a[i][8 * j + k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; ++ // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; ++ d0 = t[k]; ++ d0 <<= 11; ++ d0 += 1664; ++ d0 *= 645084; ++ d0 >>= 31; ++ t[k] = d0 & 0x7ff; + } + + r[ 0] = (t[0] >> 0); +@@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K + for (k = 0; k < 4; k++) { + t[k] = a[i][4 * j + k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; ++ // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; ++ d0 = t[k]; ++ d0 <<= 10; ++ d0 += 1665; ++ d0 *= 1290167; ++ d0 >>= 32; ++ t[k] = d0 & 0x3ff; + } + + r[0] = (t[0] >> 0); diff --git a/crypto_kem/kyber512/aarch64/poly.c b/crypto_kem/kyber512/aarch64/poly.c -index dffc655..fcfcedd 100644 +index dffc655..361ce89 100644 --- a/crypto_kem/kyber512/aarch64/poly.c +++ b/crypto_kem/kyber512/aarch64/poly.c -@@ -194,14 +194,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +@@ -51,6 +51,7 @@ + void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) { + unsigned int i, j; + int16_t u; ++ uint32_t d0; + uint8_t t[8]; + + for (i = 0; i < KYBER_N / 8; i++) { +@@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N + // map to positive standard representatives + u = a[8 * i + j]; + u += (u >> 15) & KYBER_Q; +- t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; ++ // t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; ++ d0 = u << 4; ++ d0 += 1665; ++ d0 *= 80635; ++ d0 >>= 28; ++ t[j] = d0 & 0xf; + } + + r[0] = t[0] | (t[1] << 4); +@@ -194,14 +200,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES]) **************************************************/ void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const int16_t a[KYBER_N]) { unsigned int i, j; @@ -52,11 +139,75 @@ index dffc655..fcfcedd 100644 msg[i] |= t << j; } } +diff --git a/crypto_kem/kyber512/aarch64/polyvec.c b/crypto_kem/kyber512/aarch64/polyvec.c +index d400348..f9a1ebf 100644 +--- a/crypto_kem/kyber512/aarch64/polyvec.c ++++ b/crypto_kem/kyber512/aarch64/polyvec.c +@@ -21,6 +21,7 @@ + **************************************************/ + void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) { + unsigned int i, j, k; ++ uint64_t d0; + + #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; +@@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K + for (k = 0; k < 8; k++) { + t[k] = a[i][8 * j + k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; ++ // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; ++ d0 = t[k]; ++ d0 <<= 11; ++ d0 += 1664; ++ d0 *= 645084; ++ d0 >>= 31; ++ t[k] = d0 & 0x7ff; + } + + r[ 0] = (t[0] >> 0); +@@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K + for (k = 0; k < 4; k++) { + t[k] = a[i][4 * j + k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; ++ // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; ++ d0 = t[k]; ++ d0 <<= 10; ++ d0 += 1665; ++ d0 *= 1290167; ++ d0 >>= 32; ++ t[k] = d0 & 0x3ff; + } + + r[0] = (t[0] >> 0); diff --git a/crypto_kem/kyber768/aarch64/poly.c b/crypto_kem/kyber768/aarch64/poly.c -index dffc655..fcfcedd 100644 +index dffc655..361ce89 100644 --- a/crypto_kem/kyber768/aarch64/poly.c +++ b/crypto_kem/kyber768/aarch64/poly.c -@@ -194,14 +194,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES]) +@@ -51,6 +51,7 @@ + void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) { + unsigned int i, j; + int16_t u; ++ uint32_t d0; + uint8_t t[8]; + + for (i = 0; i < KYBER_N / 8; i++) { +@@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N + // map to positive standard representatives + u = a[8 * i + j]; + u += (u >> 15) & KYBER_Q; +- t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; ++ // t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; ++ d0 = u << 4; ++ d0 += 1665; ++ d0 *= 80635; ++ d0 >>= 28; ++ t[j] = d0 & 0xf; + } + + r[0] = t[0] | (t[1] << 4); +@@ -194,14 +200,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES]) **************************************************/ void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const int16_t a[KYBER_N]) { unsigned int i, j; @@ -79,3 +230,45 @@ index dffc655..fcfcedd 100644 msg[i] |= t << j; } } +diff --git a/crypto_kem/kyber768/aarch64/polyvec.c b/crypto_kem/kyber768/aarch64/polyvec.c +index d400348..f9a1ebf 100644 +--- a/crypto_kem/kyber768/aarch64/polyvec.c ++++ b/crypto_kem/kyber768/aarch64/polyvec.c +@@ -21,6 +21,7 @@ + **************************************************/ + void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) { + unsigned int i, j, k; ++ uint64_t d0; + + #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; +@@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K + for (k = 0; k < 8; k++) { + t[k] = a[i][8 * j + k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; ++ // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; ++ d0 = t[k]; ++ d0 <<= 11; ++ d0 += 1664; ++ d0 *= 645084; ++ d0 >>= 31; ++ t[k] = d0 & 0x7ff; + } + + r[ 0] = (t[0] >> 0); +@@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K + for (k = 0; k < 4; k++) { + t[k] = a[i][4 * j + k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; +- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; ++ // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; ++ d0 = t[k]; ++ d0 <<= 10; ++ d0 += 1665; ++ d0 *= 1290167; ++ d0 >>= 32; ++ t[k] = d0 & 0x3ff; + } + + r[0] = (t[0] >> 0); From 774c3764e8b5d4758b59e9122a4fd469789d98ba Mon Sep 17 00:00:00 2001 From: Spencer Wilson Date: Tue, 2 Jan 2024 16:45:17 -0500 Subject: [PATCH 4/6] Run copy_from_upstream --- .../kyber/oldpqclean_kyber1024_aarch64/poly.c | 8 +++++++- .../oldpqclean_kyber1024_aarch64/polyvec.c | 17 +++++++++++++++-- .../kyber/oldpqclean_kyber512_aarch64/poly.c | 8 +++++++- .../kyber/oldpqclean_kyber512_aarch64/polyvec.c | 17 +++++++++++++++-- .../kyber/oldpqclean_kyber768_aarch64/poly.c | 8 +++++++- .../kyber/oldpqclean_kyber768_aarch64/polyvec.c | 17 +++++++++++++++-- 6 files changed, 66 insertions(+), 9 deletions(-) diff --git a/src/kem/kyber/oldpqclean_kyber1024_aarch64/poly.c b/src/kem/kyber/oldpqclean_kyber1024_aarch64/poly.c index 02e010b3d5..3115d1c135 100644 --- a/src/kem/kyber/oldpqclean_kyber1024_aarch64/poly.c +++ b/src/kem/kyber/oldpqclean_kyber1024_aarch64/poly.c @@ -51,6 +51,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) { unsigned int i, j; int16_t u; + uint32_t d0; uint8_t t[8]; for (i = 0; i < KYBER_N / 8; i++) { @@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N // map to positive standard representatives u = a[8 * i + j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q / 2) / KYBER_Q) & 31; + // t[j] = ((((uint32_t)u << 5) + KYBER_Q / 2) / KYBER_Q) & 31; + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/src/kem/kyber/oldpqclean_kyber1024_aarch64/polyvec.c b/src/kem/kyber/oldpqclean_kyber1024_aarch64/polyvec.c index d400348cbc..f9a1ebf152 100644 --- a/src/kem/kyber/oldpqclean_kyber1024_aarch64/polyvec.c +++ b/src/kem/kyber/oldpqclean_kyber1024_aarch64/polyvec.c @@ -21,6 +21,7 @@ **************************************************/ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) { unsigned int i, j, k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K for (k = 0; k < 8; k++) { t[k] = a[i][8 * j + k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; + // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K for (k = 0; k < 4; k++) { t[k] = a[i][4 * j + k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); diff --git a/src/kem/kyber/oldpqclean_kyber512_aarch64/poly.c b/src/kem/kyber/oldpqclean_kyber512_aarch64/poly.c index fcfceddd83..361ce89d1c 100644 --- a/src/kem/kyber/oldpqclean_kyber512_aarch64/poly.c +++ b/src/kem/kyber/oldpqclean_kyber512_aarch64/poly.c @@ -51,6 +51,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) { unsigned int i, j; int16_t u; + uint32_t d0; uint8_t t[8]; for (i = 0; i < KYBER_N / 8; i++) { @@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N // map to positive standard representatives u = a[8 * i + j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; + // t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); diff --git a/src/kem/kyber/oldpqclean_kyber512_aarch64/polyvec.c b/src/kem/kyber/oldpqclean_kyber512_aarch64/polyvec.c index d400348cbc..f9a1ebf152 100644 --- a/src/kem/kyber/oldpqclean_kyber512_aarch64/polyvec.c +++ b/src/kem/kyber/oldpqclean_kyber512_aarch64/polyvec.c @@ -21,6 +21,7 @@ **************************************************/ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) { unsigned int i, j, k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K for (k = 0; k < 8; k++) { t[k] = a[i][8 * j + k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; + // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K for (k = 0; k < 4; k++) { t[k] = a[i][4 * j + k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); diff --git a/src/kem/kyber/oldpqclean_kyber768_aarch64/poly.c b/src/kem/kyber/oldpqclean_kyber768_aarch64/poly.c index fcfceddd83..361ce89d1c 100644 --- a/src/kem/kyber/oldpqclean_kyber768_aarch64/poly.c +++ b/src/kem/kyber/oldpqclean_kyber768_aarch64/poly.c @@ -51,6 +51,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) { unsigned int i, j; int16_t u; + uint32_t d0; uint8_t t[8]; for (i = 0; i < KYBER_N / 8; i++) { @@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N // map to positive standard representatives u = a[8 * i + j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; + // t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15; + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); diff --git a/src/kem/kyber/oldpqclean_kyber768_aarch64/polyvec.c b/src/kem/kyber/oldpqclean_kyber768_aarch64/polyvec.c index d400348cbc..f9a1ebf152 100644 --- a/src/kem/kyber/oldpqclean_kyber768_aarch64/polyvec.c +++ b/src/kem/kyber/oldpqclean_kyber768_aarch64/polyvec.c @@ -21,6 +21,7 @@ **************************************************/ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) { unsigned int i, j, k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K for (k = 0; k < 8; k++) { t[k] = a[i][8 * j + k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; + // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K for (k = 0; k < 4; k++) { t[k] = a[i][4 * j + k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); From 1d0ec8cc752c345c528a2fc6134a891ad3595182 Mon Sep 17 00:00:00 2001 From: Spencer Wilson Date: Wed, 3 Jan 2024 11:25:03 -0500 Subject: [PATCH 5/6] Update to most recent Kyber commit --- scripts/copy_from_upstream/copy_from_upstream.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml index 783f73dcec..8c1313ac1e 100644 --- a/scripts/copy_from_upstream/copy_from_upstream.yml +++ b/scripts/copy_from_upstream/copy_from_upstream.yml @@ -25,7 +25,7 @@ upstreams: name: pqcrystals-kyber git_url: https://github.com/pq-crystals/kyber.git git_branch: master - git_commit: 272125f6acc8e8b6850fd68ceb901a660ff48196 + git_commit: b628ba78711bc28327dc7d2d5c074a00f061884e kem_meta_path: '{pretty_name_full}_META.yml' kem_scheme_path: '.' patches: [pqcrystals-kyber-yml.patch, pqcrystals-kyber-ref-shake-aes.patch, pqcrystals-kyber-avx2-shake-aes.patch] From 0347b006269d95b2b21041d13422e2be4ba03ef1 Mon Sep 17 00:00:00 2001 From: Spencer Wilson Date: Wed, 3 Jan 2024 11:26:20 -0500 Subject: [PATCH 6/6] Run copy_from_upstream --- docs/algorithms/kem/kyber.md | 4 ++-- docs/algorithms/kem/kyber.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/algorithms/kem/kyber.md b/docs/algorithms/kem/kyber.md index 6a6dc4afdc..9f2ad1f85d 100644 --- a/docs/algorithms/kem/kyber.md +++ b/docs/algorithms/kem/kyber.md @@ -7,9 +7,9 @@ - **Authors' website**: https://pq-crystals.org/ - **Specification version**: NIST Round 3 submission. - **Primary Source**: - - **Source**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches + - **Source**: https://github.com/pq-crystals/kyber/commit/b628ba78711bc28327dc7d2d5c074a00f061884e with copy_from_upstream patches - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0 -- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 with copy_from_upstream patches +- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/b628ba78711bc28327dc7d2d5c074a00f061884e with copy_from_upstream patches - **oldpqclean-aarch64**: - **Source**: https://github.com/PQClean/PQClean/commit/8e220a87308154d48fdfac40abbb191ac7fce06a with copy_from_upstream patches - **Implementation license (SPDX-Identifier)**: CC0-1.0 and (CC0-1.0 or Apache-2.0) and (CC0-1.0 or MIT) and MIT diff --git a/docs/algorithms/kem/kyber.yml b/docs/algorithms/kem/kyber.yml index 6cdf341327..cde870c10b 100644 --- a/docs/algorithms/kem/kyber.yml +++ b/docs/algorithms/kem/kyber.yml @@ -17,7 +17,7 @@ website: https://pq-crystals.org/ nist-round: 3 spec-version: NIST Round 3 submission primary-upstream: - source: https://github.com/pq-crystals/kyber/commit/272125f6acc8e8b6850fd68ceb901a660ff48196 + source: https://github.com/pq-crystals/kyber/commit/b628ba78711bc28327dc7d2d5c074a00f061884e with copy_from_upstream patches spdx-license-identifier: CC0-1.0 or Apache-2.0 optimized-upstreams: