From 3e7394e4bccdb1e0c89b562d74fe3cf5300da350 Mon Sep 17 00:00:00 2001 From: Manuel Barbosa <mbb@fc.up.pt> Date: Wed, 4 Dec 2024 13:44:13 +0000 Subject: [PATCH] spurious instruction --- .gitmodules | 3 + code/jasmin/mlkem_avx2/Makefile | 9 +- code/jasmin/mlkem_avx2/cbd.c | 128 --- code/jasmin/mlkem_avx2/cbd.h | 9 - code/jasmin/mlkem_avx2/compile.bench | 147 --- code/jasmin/mlkem_avx2/compile.bench.old | 147 --- code/jasmin/mlkem_avx2/consts.c | 153 ---- code/jasmin/mlkem_avx2/consts.h | 40 - code/jasmin/mlkem_avx2/fips202.c | 549 ------------ code/jasmin/mlkem_avx2/fips202.h | 28 - code/jasmin/mlkem_avx2/fq.S | 129 --- code/jasmin/mlkem_avx2/fq.inc | 26 - code/jasmin/mlkem_avx2/gen_matrix.jinc.try0 | 940 -------------------- code/jasmin/mlkem_avx2/indcpa.c | 320 ------- code/jasmin/mlkem_avx2/indcpa.h | 36 - code/jasmin/mlkem_avx2/kem.c | 145 --- code/jasmin/mlkem_avx2/kem.h | 41 - code/jasmin/mlkem_avx2/ntt.S | 198 ----- code/jasmin/mlkem_avx2/ntt.c | 152 ---- code/jasmin/mlkem_avx2/ntt.h | 45 - code/jasmin/mlkem_avx2/params.h | 50 -- code/jasmin/mlkem_avx2/poly.c | 378 -------- code/jasmin/mlkem_avx2/poly.h | 68 -- code/jasmin/mlkem_avx2/poly_ntt.c | 10 - code/jasmin/mlkem_avx2/polyvec.c | 237 ----- code/jasmin/mlkem_avx2/polyvec.h | 47 - code/jasmin/mlkem_avx2/reduce.c | 62 -- code/jasmin/mlkem_avx2/reduce.h | 15 - code/jasmin/mlkem_avx2/shuffle.S | 261 ------ code/jasmin/mlkem_avx2/speed.h | 62 -- code/jasmin/mlkem_avx2/symmetric-fips202.c | 77 -- code/jasmin/mlkem_avx2/symmetric.h | 52 -- code/jasmin/mlkem_ref/Makefile | 5 +- code/jasmin/mlkem_ref/cbd.c | 58 -- code/jasmin/mlkem_ref/cbd.h | 8 - code/jasmin/mlkem_ref/extraction/jkem.ec | 1 - code/jasmin/mlkem_ref/fips202.c | 549 ------------ code/jasmin/mlkem_ref/fips202.h | 28 - code/jasmin/mlkem_ref/indcpa.c | 321 ------- code/jasmin/mlkem_ref/indcpa.h | 36 - code/jasmin/mlkem_ref/kem.c | 145 --- code/jasmin/mlkem_ref/kem.h | 41 - code/jasmin/mlkem_ref/ntt.c | 152 ---- code/jasmin/mlkem_ref/ntt.h | 15 - code/jasmin/mlkem_ref/params.h | 42 - code/jasmin/mlkem_ref/poly.c | 359 -------- code/jasmin/mlkem_ref/poly.h | 66 -- code/jasmin/mlkem_ref/poly_ntt.c | 10 - code/jasmin/mlkem_ref/polyvec.c | 237 ----- code/jasmin/mlkem_ref/polyvec.h | 47 - code/jasmin/mlkem_ref/reduce.c | 62 -- code/jasmin/mlkem_ref/reduce.h | 15 - code/jasmin/mlkem_ref/reduce.jinc | 1 - code/jasmin/mlkem_ref/symmetric-fips202.c | 77 -- code/jasmin/mlkem_ref/symmetric.h | 52 -- code/kyber | 1 + 56 files changed, 12 insertions(+), 6880 deletions(-) delete mode 100644 code/jasmin/mlkem_avx2/cbd.c delete mode 100644 code/jasmin/mlkem_avx2/cbd.h delete mode 100644 code/jasmin/mlkem_avx2/compile.bench delete mode 100644 code/jasmin/mlkem_avx2/compile.bench.old delete mode 100644 code/jasmin/mlkem_avx2/consts.c delete mode 100644 code/jasmin/mlkem_avx2/consts.h delete mode 100644 code/jasmin/mlkem_avx2/fips202.c delete mode 100644 code/jasmin/mlkem_avx2/fips202.h delete mode 100644 code/jasmin/mlkem_avx2/fq.S delete mode 100644 code/jasmin/mlkem_avx2/fq.inc delete mode 100644 code/jasmin/mlkem_avx2/gen_matrix.jinc.try0 delete mode 100644 code/jasmin/mlkem_avx2/indcpa.c delete mode 100644 code/jasmin/mlkem_avx2/indcpa.h delete mode 100644 code/jasmin/mlkem_avx2/kem.c delete mode 100644 code/jasmin/mlkem_avx2/kem.h delete mode 100644 code/jasmin/mlkem_avx2/ntt.S delete mode 100644 code/jasmin/mlkem_avx2/ntt.c delete mode 100644 code/jasmin/mlkem_avx2/ntt.h delete mode 100644 code/jasmin/mlkem_avx2/params.h delete mode 100644 code/jasmin/mlkem_avx2/poly.c delete mode 100644 code/jasmin/mlkem_avx2/poly.h delete mode 100644 code/jasmin/mlkem_avx2/poly_ntt.c delete mode 100644 code/jasmin/mlkem_avx2/polyvec.c delete mode 100644 code/jasmin/mlkem_avx2/polyvec.h delete mode 100644 code/jasmin/mlkem_avx2/reduce.c delete mode 100644 code/jasmin/mlkem_avx2/reduce.h delete mode 100644 code/jasmin/mlkem_avx2/shuffle.S delete mode 100644 code/jasmin/mlkem_avx2/speed.h delete mode 100644 code/jasmin/mlkem_avx2/symmetric-fips202.c delete mode 100644 code/jasmin/mlkem_avx2/symmetric.h delete mode 100644 code/jasmin/mlkem_ref/cbd.c delete mode 100644 code/jasmin/mlkem_ref/cbd.h delete mode 100644 code/jasmin/mlkem_ref/fips202.c delete mode 100644 code/jasmin/mlkem_ref/fips202.h delete mode 100644 code/jasmin/mlkem_ref/indcpa.c delete mode 100644 code/jasmin/mlkem_ref/indcpa.h delete mode 100644 code/jasmin/mlkem_ref/kem.c delete mode 100644 code/jasmin/mlkem_ref/kem.h delete mode 100644 code/jasmin/mlkem_ref/ntt.c delete mode 100644 code/jasmin/mlkem_ref/ntt.h delete mode 100644 code/jasmin/mlkem_ref/params.h delete mode 100644 code/jasmin/mlkem_ref/poly.c delete mode 100644 code/jasmin/mlkem_ref/poly.h delete mode 100644 code/jasmin/mlkem_ref/poly_ntt.c delete mode 100644 code/jasmin/mlkem_ref/polyvec.c delete mode 100644 code/jasmin/mlkem_ref/polyvec.h delete mode 100644 code/jasmin/mlkem_ref/reduce.c delete mode 100644 code/jasmin/mlkem_ref/reduce.h delete mode 100644 code/jasmin/mlkem_ref/symmetric-fips202.c delete mode 100644 code/jasmin/mlkem_ref/symmetric.h create mode 160000 code/kyber diff --git a/.gitmodules b/.gitmodules index 94719e88..583d8a8d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "formosa-keccak"] path = formosa-keccak url = git@github.com:formosa-crypto/formosa-keccak.git +[submodule "code/kyber"] + path = code/kyber + url = https://github.com/pq-crystals/kyber.git diff --git a/code/jasmin/mlkem_avx2/Makefile b/code/jasmin/mlkem_avx2/Makefile index b9bfd118..28cf46e2 100644 --- a/code/jasmin/mlkem_avx2/Makefile +++ b/code/jasmin/mlkem_avx2/Makefile @@ -5,6 +5,7 @@ JADDFLAGS ?= -lazy-regalloc +CKP := ../../kyber/avx2 CC ?= /usr/bin/gcc GFLAGS ?= CFLAGS := -Wall -Wextra -g -Ofast -fomit-frame-pointer @@ -50,7 +51,7 @@ testX: test/test_poly_compress \ speed: test/speed_indcpa \ test/speed_mlkem -HEADERS = params.h poly.h fips202.h ntt.h indcpa.h kem.h \ +HEADERS = $(CKP)/params.h $(CKP)/poly.h $(CKP)/fips202.h $(CKP)/ntt.h $(CKP)/indcpa.h $(CKP)/kem.h \ JHEADERS = params.jinc \ reduce.jinc \ @@ -64,13 +65,13 @@ JHEADERS = params.jinc \ verify.jinc POLYHEADERS = poly.jinc \ - consts.jinc \ + consts.jinc \ POLYVECHEADERS = polyvec.jinc \ gen_matrix.jinc \ -INCS = fq.inc shuffle.inc -SOURCES = poly.c polyvec.c cbd.c fips202.c ntt.c reduce.c symmetric-fips202.c indcpa.c kem.c consts.c shuffle.S fq.S\ +INCS = $(CKP)/fq.inc $(CKP)/shuffle.inc +SOURCES = $(CKP)/poly.c $(CKP)/polyvec.c $(CKP)/cbd.c $(CKP)/fips202.c $(CKP)/fips202x4.c $(CKP)/ntt.S $(CKP)/verify.c $(CKP)/symmetric-shake.c $(CKP)/indcpa.c $(CKP)/kem.c $(CKP)/consts.c $(CKP)/rejsample.c $(CKP)/shuffle.S $(CKP)/fq.S\ test/test_indcpa: test/test_indcpa.c $(HEADERS) $(SOURCES) $(INCS) jindcpa.o $(CC) $(CFLAGS) -o $@ $(SOURCES) jindcpa.o $< diff --git a/code/jasmin/mlkem_avx2/cbd.c b/code/jasmin/mlkem_avx2/cbd.c deleted file mode 100644 index cfbcf0ac..00000000 --- a/code/jasmin/mlkem_avx2/cbd.c +++ /dev/null @@ -1,128 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: load32_littleendian -* -* Description: load bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const unsigned char *x) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: load24_littleendian -* -* Description: load 3 bytes into a 32-bit integer -* in little-endian order. -* This function is only needed for Kyber-512 -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) -**************************************************/ -#if MLKEM_ETA1 == 3 -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif - - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -static void cbd2(poly *r, const uint8_t buf[2*MLKEM_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i<MLKEM_N/8;i++) { - t = load32_littleendian(buf+4*i); - d = t & 0x55555555; - d += (t>>1) & 0x55555555; - - for(j=0;j<8;j++) { - a = (d >> (4*j+0)) & 0x3; - b = (d >> (4*j+2)) & 0x3; - r->coeffs[8*i+j] = a - b; - } - } -} - -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3. -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -#if MLKEM_ETA1 == 3 -static void cbd3(poly *r, const uint8_t buf[3*MLKEM_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i<MLKEM_N/4;i++) { - t = load24_littleendian(buf+3*i); - d = t & 0x00249249; - d += (t>>1) & 0x00249249; - d += (t>>2) & 0x00249249; - - for(j=0;j<4;j++) { - a = (d >> (6*j+0)) & 0x7; - b = (d >> (6*j+3)) & 0x7; - r->coeffs[4*i+j] = a - b; - } - } -} -#endif - -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1*MLKEM_N/4]) -{ -#if MLKEM_ETA1 == 2 - cbd2(r, buf); -#elif MLKEM_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2*MLKEM_N/4]) -{ -#if MLKEM_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/code/jasmin/mlkem_avx2/cbd.h b/code/jasmin/mlkem_avx2/cbd.h deleted file mode 100644 index 5dc3046b..00000000 --- a/code/jasmin/mlkem_avx2/cbd.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include "poly.h" - -void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1*MLKEM_N/4]); -void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2*MLKEM_N/4]); - -#endif diff --git a/code/jasmin/mlkem_avx2/compile.bench b/code/jasmin/mlkem_avx2/compile.bench deleted file mode 100644 index 0981229c..00000000 --- a/code/jasmin/mlkem_avx2/compile.bench +++ /dev/null @@ -1,147 +0,0 @@ -===================================================== -===== Benchmark with flag -until_typing -===================================================== - -real 0m0.127s -user 0m0.095s -sys 0m0.025s -===================================================== -===== Benchmark with flag -until_cstexp -===================================================== - -real 0m0.114s -user 0m0.092s -sys 0m0.016s -===================================================== -===== Benchmark with flag -until_inline -===================================================== - -real 0m0.323s -user 0m0.297s -sys 0m0.019s -===================================================== -===== Benchmark with flag -until_rmfunc -===================================================== - -real 0m0.320s -user 0m0.294s -sys 0m0.020s -===================================================== -===== Benchmark with flag -until_unroll -===================================================== - -real 0m0.453s -user 0m0.424s -sys 0m0.022s -===================================================== -===== Benchmark with flag -until_splitting -===================================================== - -real 0m0.632s -user 0m0.597s -sys 0m0.027s -===================================================== -===== Benchmark with flag -until_valloc -===================================================== - -real 0m0.036s -user 0m0.016s -sys 0m0.015s -===================================================== -===== Benchmark with flag -until_vallocd -===================================================== - -real 0m0.034s -user 0m0.015s -sys 0m0.013s -===================================================== -===== Benchmark with flag -until_vshare -===================================================== - -real 0m0.036s -user 0m0.016s -sys 0m0.014s -===================================================== -===== Benchmark with flag -until_vshared -===================================================== - -real 0m0.035s -user 0m0.016s -sys 0m0.014s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m1.359s -user 0m1.317s -sys 0m0.033s -===================================================== -===== Benchmark with flag -until_rmarrinit -===================================================== - -real 0m1.125s -user 0m1.082s -sys 0m0.033s -===================================================== -===== Benchmark with flag -until_rmglobals -===================================================== - -real 0m2.040s -user 0m1.996s -sys 0m0.034s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m1.371s -user 0m1.327s -sys 0m0.034s -===================================================== -===== Benchmark with flag -until_makeref -===================================================== - -real 0m1.242s -user 0m1.199s -sys 0m0.034s -===================================================== -===== Benchmark with flag -until_lowering -===================================================== - -real 0m2.186s -user 0m2.140s -sys 0m0.041s -===================================================== -===== Benchmark with flag -until_stkalloc -===================================================== - -real 0m3.252s -user 0m3.202s -sys 0m0.041s -===================================================== -===== Benchmark with flag -until_ralloc -===================================================== - -real 0m3.866s -user 0m3.819s -sys 0m0.037s -===================================================== -===== Benchmark with flag -until_rallocd -===================================================== - -real 0m3.965s -user 0m3.920s -sys 0m0.037s -===================================================== -===== Benchmark with flag -until_linear -===================================================== - -real 0m3.983s -user 0m3.938s -sys 0m0.038s -===================================================== -===== Benchmark with flag -until_asm -===================================================== - -real 0m4.384s -user 0m4.233s -sys 0m0.140s diff --git a/code/jasmin/mlkem_avx2/compile.bench.old b/code/jasmin/mlkem_avx2/compile.bench.old deleted file mode 100644 index e46e66ee..00000000 --- a/code/jasmin/mlkem_avx2/compile.bench.old +++ /dev/null @@ -1,147 +0,0 @@ -===================================================== -===== Benchmark with flag -until_typing -===================================================== - -real 0m0.026s -user 0m0.023s -sys 0m0.004s -===================================================== -===== Benchmark with flag -until_cstexp -===================================================== - -real 0m0.027s -user 0m0.024s -sys 0m0.003s -===================================================== -===== Benchmark with flag -until_inline -===================================================== - -real 0m0.128s -user 0m0.119s -sys 0m0.009s -===================================================== -===== Benchmark with flag -until_rmfunc -===================================================== - -real 0m0.128s -user 0m0.124s -sys 0m0.004s -===================================================== -===== Benchmark with flag -until_unroll -===================================================== - -real 0m0.813s -user 0m0.789s -sys 0m0.024s -===================================================== -===== Benchmark with flag -until_splitting -===================================================== - -real 0m1.017s -user 0m1.012s -sys 0m0.004s -===================================================== -===== Benchmark with flag -until_valloc -===================================================== - -real 0m2.145s -user 0m2.116s -sys 0m0.029s -===================================================== -===== Benchmark with flag -until_vallocd -===================================================== - -real 0m3.375s -user 0m3.322s -sys 0m0.032s -===================================================== -===== Benchmark with flag -until_vshare -===================================================== - -real 0m6.072s -user 0m6.005s -sys 0m0.067s -===================================================== -===== Benchmark with flag -until_vshared -===================================================== - -real 0m9.594s -user 0m9.554s -sys 0m0.039s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m10.981s -user 0m10.943s -sys 0m0.036s -===================================================== -===== Benchmark with flag -until_rmarrinit -===================================================== - -real 0m9.608s -user 0m9.564s -sys 0m0.043s -===================================================== -===== Benchmark with flag -until_rmglobals -===================================================== - -real 0m11.234s -user 0m11.184s -sys 0m0.050s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m10.989s -user 0m10.908s -sys 0m0.052s -===================================================== -===== Benchmark with flag -until_makeref -===================================================== - -real 0m11.783s -user 0m11.750s -sys 0m0.032s -===================================================== -===== Benchmark with flag -until_lowering -===================================================== - -real 0m12.629s -user 0m12.561s -sys 0m0.068s -===================================================== -===== Benchmark with flag -until_stkalloc -===================================================== - -real 2m27.958s -user 2m27.867s -sys 0m0.088s -===================================================== -===== Benchmark with flag -until_ralloc -===================================================== - -real 4m43.603s -user 4m43.537s -sys 0m0.057s -===================================================== -===== Benchmark with flag -until_rallocd -===================================================== - -real 4m39.180s -user 4m39.085s -sys 0m0.095s -===================================================== -===== Benchmark with flag -until_linear -===================================================== - -real 4m43.906s -user 4m43.843s -sys 0m0.063s -===================================================== -===== Benchmark with flag -until_asm -===================================================== - -real 4m51.571s -user 4m51.416s -sys 0m0.156s diff --git a/code/jasmin/mlkem_avx2/consts.c b/code/jasmin/mlkem_avx2/consts.c deleted file mode 100644 index 7829b867..00000000 --- a/code/jasmin/mlkem_avx2/consts.c +++ /dev/null @@ -1,153 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "consts.h" - -#define Q MLKEM_Q -#define MONT ((1U << 16) % Q) -#define QINV 62209 // q^-1 mod 2^16 -#define V (((1U << 26) + Q/2)/Q) -#define FHI (MONT*(MONT*(Q-1)*((Q-1)/128) % Q) % Q) -#define FLO (FHI*QINV % 65536) -#define MONTSQHI (MONT*MONT % Q) -#define MONTSQLO (MONTSQHI*QINV % 65536) -#define MASK 4095 - -const uint16_t qdata[928] __attribute__((aligned(32))) = { -#define _16XQ 0 - Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, - -#define _16XQINV 16 - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - -#define _16XV 32 - V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, - -#define _16XFLO 48 - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - -#define _16XFHI 64 - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - -#define _16XMONTSQLO 80 - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - -#define _16XMONTSQHI 96 - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - -#define _16XMASK 112 - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - -#define _ZETAS_EXP 128 - 31499, 31499, 2571, 2571, 14746, 14746, 2970, 2970, - 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, - 53134, 53134, 53134, 53134, 53134, 53134, 53134, 53134, - 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, - 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, - 44630, 44630, 44630, 44630, 27758, 27758, 27758, 27758, - 61737, 61737, 61737, 61737, 49846, 49846, 49846, 49846, - 3158, 3158, 3158, 3158, 622, 622, 622, 622, - 1577, 1577, 1577, 1577, 182, 182, 182, 182, - 59709, 59709, 17364, 17364, 39176, 39176, 36479, 36479, - 5572, 5572, 64434, 64434, 21439, 21439, 39295, 39295, - 573, 573, 2004, 2004, 264, 264, 383, 383, - 2500, 2500, 1458, 1458, 1727, 1727, 3199, 3199, - 59847, 59020, 1497, 30967, 41972, 20179, 20711, 25081, - 52740, 26617, 16065, 53095, 9135, 64887, 39550, 27837, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, - 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 65202, 54059, 33310, 20494, 37798, 945, 50654, 6182, - 32011, 10631, 29176, 36775, 47051, 17561, 51106, 60261, - 2226, 555, 2078, 1550, 422, 177, 3038, 1574, - 3083, 1159, 2552, 2727, 1739, 2457, 418, 3173, - 11182, 13387, 51303, 43881, 13131, 60950, 23093, 5493, - 33034, 30318, 46795, 12639, 20100, 18525, 19529, 52918, - 430, 843, 871, 105, 587, 3094, 2869, 1653, - 778, 3182, 1483, 1119, 644, 349, 329, 3254, - 788, 788, 1812, 1812, 28191, 28191, 28191, 28191, - 28191, 28191, 28191, 28191, 48842, 48842, 48842, 48842, - 48842, 48842, 48842, 48842, 287, 287, 287, 287, - 287, 287, 287, 287, 202, 202, 202, 202, - 202, 202, 202, 202, 10690, 10690, 10690, 10690, - 1359, 1359, 1359, 1359, 54335, 54335, 54335, 54335, - 31164, 31164, 31164, 31164, 962, 962, 962, 962, - 2127, 2127, 2127, 2127, 1855, 1855, 1855, 1855, - 1468, 1468, 1468, 1468, 37464, 37464, 24313, 24313, - 55004, 55004, 8800, 8800, 18427, 18427, 8859, 8859, - 26676, 26676, 49374, 49374, 2648, 2648, 1017, 1017, - 732, 732, 608, 608, 1787, 1787, 411, 411, - 3124, 3124, 1758, 1758, 19884, 37287, 49650, 56638, - 37227, 9076, 35338, 18250, 13427, 14017, 36381, 52780, - 16832, 4312, 41381, 47622, 2476, 3239, 3058, 830, - 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, - 448, 2264, 677, 2054, 34353, 25435, 58154, 24392, - 44610, 10946, 24215, 16990, 10336, 57603, 43035, 10907, - 31637, 28644, 23998, 48114, 817, 603, 1322, 1864, - 2114, 1218, 2455, 2142, 2144, 2051, 1819, 2459, - 3221, 996, 958, 1522, 20297, 2146, 15356, 33152, - 59257, 50634, 54492, 14470, 44039, 45338, 23211, 48094, - 41677, 45279, 7757, 23132, 1097, 610, 2044, 384, - 3193, 1994, 220, 1670, 1799, 794, 2475, 478, - 3021, 991, 1869, 1628, 0, 0, 0, 0, - -#define _ZETAS_INV_EXP 528 - 42405, 57780, 20258, 23860, 17443, 42326, 20199, 21498, - 51067, 11045, 14903, 6280, 32385, 50181, 63391, 45240, - 1701, 1460, 2338, 308, 2851, 854, 2535, 1530, - 1659, 3109, 1335, 136, 2945, 1285, 2719, 2232, - 17423, 41539, 36893, 33900, 54630, 22502, 7934, 55201, - 48547, 41322, 54591, 20927, 41145, 7383, 40102, 31184, - 1807, 2371, 2333, 108, 870, 1510, 1278, 1185, - 1187, 874, 2111, 1215, 1465, 2007, 2726, 2512, - 17915, 24156, 61225, 48705, 12757, 29156, 51520, 52110, - 47287, 30199, 56461, 28310, 8899, 15887, 28250, 45653, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, - 951, 247, 1421, 3222, 2499, 271, 90, 853, - 16163, 16163, 38861, 38861, 56678, 56678, 47110, 47110, - 56737, 56737, 10533, 10533, 41224, 41224, 28073, 28073, - 1571, 1571, 205, 205, 2918, 2918, 1542, 1542, - 2721, 2721, 2597, 2597, 2312, 2312, 681, 681, - 34373, 34373, 34373, 34373, 11202, 11202, 11202, 11202, - 64178, 64178, 64178, 64178, 54847, 54847, 54847, 54847, - 1861, 1861, 1861, 1861, 1474, 1474, 1474, 1474, - 1202, 1202, 1202, 1202, 2367, 2367, 2367, 2367, - 16695, 16695, 16695, 16695, 16695, 16695, 16695, 16695, - 37346, 37346, 37346, 37346, 37346, 37346, 37346, 37346, - 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, - 3042, 3042, 3042, 3042, 3042, 3042, 3042, 3042, - 64749, 64749, 1517, 1517, 12619, 46008, 47012, 45437, - 52898, 18742, 35219, 32503, 60044, 42444, 4587, 52406, - 21656, 14234, 52150, 54355, 75, 3000, 2980, 2685, - 2210, 1846, 147, 2551, 1676, 460, 235, 2742, - 3224, 2458, 2486, 2899, 5276, 14431, 47976, 18486, - 28762, 36361, 54906, 33526, 59355, 14883, 64592, 27739, - 45043, 32227, 11478, 335, 156, 2911, 872, 1590, - 602, 777, 2170, 246, 1755, 291, 3152, 2907, - 1779, 1251, 2774, 1103, 37700, 25987, 650, 56402, - 12442, 49472, 38920, 12797, 40456, 44826, 45358, 23565, - 34570, 64040, 6517, 5690, 1860, 3203, 1162, 1618, - 666, 320, 8, 2813, 1544, 282, 1838, 1293, - 2314, 552, 2677, 2106, 26242, 26242, 44098, 44098, - 1103, 1103, 59965, 59965, 29058, 29058, 26361, 26361, - 48173, 48173, 5828, 5828, 130, 130, 1602, 1602, - 1871, 1871, 829, 829, 2946, 2946, 3065, 3065, - 1325, 1325, 2756, 2756, 15691, 15691, 15691, 15691, - 3800, 3800, 3800, 3800, 37779, 37779, 37779, 37779, - 20907, 20907, 20907, 20907, 3147, 3147, 3147, 3147, - 1752, 1752, 1752, 1752, 2707, 2707, 2707, 2707, - 171, 171, 171, 171, 12403, 12403, 12403, 12403, - 12403, 12403, 12403, 12403, 52012, 52012, 52012, 52012, - 52012, 52012, 52012, 52012, 1907, 1907, 1907, 1907, - 1907, 1907, 1907, 1907, 1836, 1836, 1836, 1836, - 1836, 1836, 1836, 1836, 50791, 50791, 359, 359, - 60300, 60300, 1932, 1932, 0, 0, 0, 0 -}; diff --git a/code/jasmin/mlkem_avx2/consts.h b/code/jasmin/mlkem_avx2/consts.h deleted file mode 100644 index 35ebc43e..00000000 --- a/code/jasmin/mlkem_avx2/consts.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef CONSTS_H -#define CONSTS_H - -#include "params.h" - -#define _16XQ 0 -#define _16XQINV 16 -#define _16XV 32 -#define _16XFLO 48 -#define _16XFHI 64 -#define _16XMONTSQLO 80 -#define _16XMONTSQHI 96 -#define _16XMASK 112 -#define _ZETAS_EXP 128 -#define _ZETAS_INV_EXP 528 - -/* The C ABI on MacOS exports all symbols with a leading - * underscore. This means that any symbols we refer to from - * C files (functions) can't be found, and all symbols we - * refer to from ASM also can't be found. - * - * This define helps us get around this - */ -#ifdef __ASSEMBLER__ -#if defined(__WIN32__) || defined(__APPLE__) -#define decorate(s) _##s -#define cdecl2(s) decorate(s) -#define cdecl(s) cdecl2(MLKEM_NAMESPACE(##s)) -#else -#define cdecl(s) MLKEM_NAMESPACE(##s) -#endif -#endif - -#ifndef __ASSEMBLER__ -#include <stdint.h> -#define qdata MLKEM_NAMESPACE(qdata) -extern const uint16_t qdata[]; -#endif - -#endif diff --git a/code/jasmin/mlkem_avx2/fips202.c b/code/jasmin/mlkem_avx2/fips202.c deleted file mode 100644 index d300328b..00000000 --- a/code/jasmin/mlkem_avx2/fips202.c +++ /dev/null @@ -1,549 +0,0 @@ -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ - -#include <stdint.h> -#include <assert.h> -#include "fips202.h" - -#define NROUNDS 24 -#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) - -/************************************************* -* Name: load64 -* -* Description: Load 8 bytes into uint64_t in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns the loaded 64-bit unsigned integer -**************************************************/ -static uint64_t load64(const unsigned char *x) -{ - unsigned long long r = 0, i; - - for (i = 0; i < 8; ++i) { - r |= (unsigned long long)x[i] << 8 * i; - } - return r; -} - -/************************************************* -* Name: store64 -* -* Description: Store a 64-bit integer to a byte array in little-endian order -* -* Arguments: - uint8_t *x: pointer to the output byte array -* - uint64_t u: input 64-bit unsigned integer -**************************************************/ -static void store64(uint8_t *x, uint64_t u) -{ - unsigned int i; - - for(i=0; i<8; ++i) { - x[i] = u; - u >>= 8; - } -} - -/* Keccak round constants */ -static const uint64_t KeccakF_RoundConstants[NROUNDS] = -{ - (uint64_t)0x0000000000000001ULL, - (uint64_t)0x0000000000008082ULL, - (uint64_t)0x800000000000808aULL, - (uint64_t)0x8000000080008000ULL, - (uint64_t)0x000000000000808bULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008009ULL, - (uint64_t)0x000000000000008aULL, - (uint64_t)0x0000000000000088ULL, - (uint64_t)0x0000000080008009ULL, - (uint64_t)0x000000008000000aULL, - (uint64_t)0x000000008000808bULL, - (uint64_t)0x800000000000008bULL, - (uint64_t)0x8000000000008089ULL, - (uint64_t)0x8000000000008003ULL, - (uint64_t)0x8000000000008002ULL, - (uint64_t)0x8000000000000080ULL, - (uint64_t)0x000000000000800aULL, - (uint64_t)0x800000008000000aULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008080ULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008008ULL -}; - -/************************************************* -* Name: KeccakF1600_StatePermute -* -* Description: The Keccak F1600 Permutation -* -* Arguments: - uint64_t * state: pointer to in/output Keccak state -**************************************************/ -static void KeccakF1600_StatePermute(uint64_t * state) -{ - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - uint64_t BCa, BCe, BCi, BCo, BCu; - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - //copyFromState(A, state) - Aba = state[ 0]; - Abe = state[ 1]; - Abi = state[ 2]; - Abo = state[ 3]; - Abu = state[ 4]; - Aga = state[ 5]; - Age = state[ 6]; - Agi = state[ 7]; - Ago = state[ 8]; - Agu = state[ 9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for( round = 0; round < NROUNDS; round += 2 ) - { - // prepareTheta - BCa = Aba^Aga^Aka^Ama^Asa; - BCe = Abe^Age^Ake^Ame^Ase; - BCi = Abi^Agi^Aki^Ami^Asi; - BCo = Abo^Ago^Ako^Amo^Aso; - BCu = Abu^Agu^Aku^Amu^Asu; - - //thetaRhoPiChiIotaPrepareTheta(round , A, E) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^((~BCe)& BCi ); - Eba ^= (uint64_t)KeccakF_RoundConstants[round]; - Ebe = BCe ^((~BCi)& BCo ); - Ebi = BCi ^((~BCo)& BCu ); - Ebo = BCo ^((~BCu)& BCa ); - Ebu = BCu ^((~BCa)& BCe ); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^((~BCe)& BCi ); - Ege = BCe ^((~BCi)& BCo ); - Egi = BCi ^((~BCo)& BCu ); - Ego = BCo ^((~BCu)& BCa ); - Egu = BCu ^((~BCa)& BCe ); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^((~BCe)& BCi ); - Eke = BCe ^((~BCi)& BCo ); - Eki = BCi ^((~BCo)& BCu ); - Eko = BCo ^((~BCu)& BCa ); - Eku = BCu ^((~BCa)& BCe ); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^((~BCe)& BCi ); - Eme = BCe ^((~BCi)& BCo ); - Emi = BCi ^((~BCo)& BCu ); - Emo = BCo ^((~BCu)& BCa ); - Emu = BCu ^((~BCa)& BCe ); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^((~BCe)& BCi ); - Ese = BCe ^((~BCi)& BCo ); - Esi = BCi ^((~BCo)& BCu ); - Eso = BCo ^((~BCu)& BCa ); - Esu = BCu ^((~BCa)& BCe ); - - // prepareTheta - BCa = Eba^Ega^Eka^Ema^Esa; - BCe = Ebe^Ege^Eke^Eme^Ese; - BCi = Ebi^Egi^Eki^Emi^Esi; - BCo = Ebo^Ego^Eko^Emo^Eso; - BCu = Ebu^Egu^Eku^Emu^Esu; - - //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^((~BCe)& BCi ); - Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; - Abe = BCe ^((~BCi)& BCo ); - Abi = BCi ^((~BCo)& BCu ); - Abo = BCo ^((~BCu)& BCa ); - Abu = BCu ^((~BCa)& BCe ); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^((~BCe)& BCi ); - Age = BCe ^((~BCi)& BCo ); - Agi = BCi ^((~BCo)& BCu ); - Ago = BCo ^((~BCu)& BCa ); - Agu = BCu ^((~BCa)& BCe ); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^((~BCe)& BCi ); - Ake = BCe ^((~BCi)& BCo ); - Aki = BCi ^((~BCo)& BCu ); - Ako = BCo ^((~BCu)& BCa ); - Aku = BCu ^((~BCa)& BCe ); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^((~BCe)& BCi ); - Ame = BCe ^((~BCi)& BCo ); - Ami = BCi ^((~BCo)& BCu ); - Amo = BCo ^((~BCu)& BCa ); - Amu = BCu ^((~BCa)& BCe ); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^((~BCe)& BCi ); - Ase = BCe ^((~BCi)& BCo ); - Asi = BCi ^((~BCo)& BCu ); - Aso = BCo ^((~BCu)& BCa ); - Asu = BCu ^((~BCa)& BCe ); - } - - //copyToState(state, A) - state[ 0] = Aba; - state[ 1] = Abe; - state[ 2] = Abi; - state[ 3] = Abo; - state[ 4] = Abu; - state[ 5] = Aga; - state[ 6] = Age; - state[ 7] = Agi; - state[ 8] = Ago; - state[ 9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; - - #undef round -} - -#include <string.h> -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - - -/************************************************* -* Name: keccak_absorb -* -* Description: Absorb step of Keccak; -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -* - const unsigned char *m: pointer to input to be absorbed into s -* - unsigned long long mlen: length of input in bytes -* - unsigned char p: domain-separation byte for different Keccak-derived functions -**************************************************/ -static void keccak_absorb(uint64_t *s, - unsigned int r, - const unsigned char *m, unsigned long long int mlen, - unsigned char p) -{ - unsigned long long i; - unsigned char t[200]; - - // Zero state - for (i = 0; i < 25; ++i) - s[i] = 0; - - while (mlen >= r) - { - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(m + 8 * i); - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) - t[i] = 0; - for (i = 0; i < mlen; ++i) - t[i] = m[i]; - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(t + 8 * i); -} - - -/************************************************* -* Name: keccak_squeezeblocks -* -* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *h: pointer to output blocks -* - unsigned long long int nblocks: number of blocks to be squeezed (written to h) -* - uint64_t *s: pointer to in/output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -**************************************************/ -static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, - uint64_t *s, - unsigned int r) -{ - unsigned int i; - while(nblocks > 0) - { - KeccakF1600_StatePermute(s); - for(i=0;i<(r>>3);i++) - { - store64(h+8*i, s[i]); - } - h += r; - nblocks--; - } -} - - -/************************************************* -* Name: shake128_absorb -* -* Description: Absorb step of the SHAKE128 XOF. -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to input to be absorbed into s -* - unsigned long long inputByteLen: length of input in bytes -**************************************************/ -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) -{ - keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F); -} - -/************************************************* -* Name: shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - uint64_t *s: pointer to in/output Keccak state -**************************************************/ -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) -{ - keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); -} - -/************************************************* -* Name: shake256 -* -* Description: SHAKE256 XOF with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: requested output length in bytes - - const unsigned char *input: pointer to input - - unsigned long long inlen: length of input in bytes -**************************************************/ -void shake256(unsigned char *output, unsigned long long outlen, - const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHAKE256_RATE]; - unsigned long long nblocks = outlen/SHAKE256_RATE; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F); - - /* Squeeze output */ - keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); - - output+=nblocks*SHAKE256_RATE; - outlen-=nblocks*SHAKE256_RATE; - - if(outlen) - { - keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); - for(i=0;i<outlen;i++) - output[i] = t[i]; - } -} - -/************************************************* -* Name: sha3_256 -* -* Description: SHA3-256 with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output (32 bytes) -* - const unsigned char *input: pointer to input -* - unsigned long long inlen: length of input in bytes -**************************************************/ -void sha3_256(unsigned char *output, const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHA3_256_RATE]; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHA3_256_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, SHA3_256_RATE); - - for(i=0;i<32;i++) - output[i] = t[i]; -} - -/************************************************* -* Name: sha3_512 -* -* Description: SHA3-512 with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output (64 bytes) -* - const unsigned char *input: pointer to input -* - unsigned long long inlen: length of input in bytes -**************************************************/ -void sha3_512(unsigned char *output, const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHA3_512_RATE]; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHA3_512_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, SHA3_512_RATE); - - for(i=0;i<64;i++) - output[i] = t[i]; -} - diff --git a/code/jasmin/mlkem_avx2/fips202.h b/code/jasmin/mlkem_avx2/fips202.h deleted file mode 100644 index f1cb58bf..00000000 --- a/code/jasmin/mlkem_avx2/fips202.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef FIPS202_H -#define FIPS202_H - -#include <stdint.h> - -#define SHAKE128_RATE 168 -#define SHAKE256_RATE 136 -#define SHA3_256_RATE 136 -#define SHA3_512_RATE 72 - -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); - -void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen); - -void sha3_256(unsigned char *output, const unsigned char *input, unsigned long long inlen); -void sha3_512(unsigned char *output, const unsigned char *input, unsigned long long inlen); - - - -void shake256_128_33_jazz(unsigned char *output, const unsigned char *input); -void sha3_512_32_jazz(unsigned char *output, const unsigned char *input); - -void shake128_absorb34_jazz(uint64_t *s, const unsigned char *input); -void shake128_squeezeblock_jazz(unsigned char *output, uint64_t *s); - - -#endif diff --git a/code/jasmin/mlkem_avx2/fq.S b/code/jasmin/mlkem_avx2/fq.S deleted file mode 100644 index d4c5c902..00000000 --- a/code/jasmin/mlkem_avx2/fq.S +++ /dev/null @@ -1,129 +0,0 @@ -#include "consts.h" -.include "fq.inc" - -.text -reduce128_avx: -#load -vmovdqa (%rdi),%ymm2 -vmovdqa 32(%rdi),%ymm3 -vmovdqa 64(%rdi),%ymm4 -vmovdqa 96(%rdi),%ymm5 -vmovdqa 128(%rdi),%ymm6 -vmovdqa 160(%rdi),%ymm7 -vmovdqa 192(%rdi),%ymm8 -vmovdqa 224(%rdi),%ymm9 - -red16 2,10 -red16 3,11 -red16 4,12 -red16 5,13 -red16 6,14 -red16 7,15 -red16 8,10 -red16 9,11 - -#store -vmovdqa %ymm2,(%rdi) -vmovdqa %ymm3,32(%rdi) -vmovdqa %ymm4,64(%rdi) -vmovdqa %ymm5,96(%rdi) -vmovdqa %ymm6,128(%rdi) -vmovdqa %ymm7,160(%rdi) -vmovdqa %ymm8,192(%rdi) -vmovdqa %ymm9,224(%rdi) - -ret - -.global cdecl(reduce_avx) -cdecl(reduce_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XV*2(%rsi),%ymm1 -call reduce128_avx -add $256,%rdi -call reduce128_avx -ret - -csubq128_avx: -#load -vmovdqa (%rdi),%ymm1 -vmovdqa 32(%rdi),%ymm2 -vmovdqa 64(%rdi),%ymm3 -vmovdqa 96(%rdi),%ymm4 -vmovdqa 128(%rdi),%ymm5 -vmovdqa 160(%rdi),%ymm6 -vmovdqa 192(%rdi),%ymm7 -vmovdqa 224(%rdi),%ymm8 - -csubq 1,9 -csubq 2,10 -csubq 3,11 -csubq 4,12 -csubq 5,13 -csubq 6,14 -csubq 7,15 -csubq 8,9 - -#store -vmovdqa %ymm1,(%rdi) -vmovdqa %ymm2,32(%rdi) -vmovdqa %ymm3,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm5,128(%rdi) -vmovdqa %ymm6,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm8,224(%rdi) - -ret - -.global cdecl(csubq_avx) -cdecl(csubq_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -call csubq128_avx -add $256,%rdi -call csubq128_avx -ret - -tomont128_avx: -#load -vmovdqa (%rdi),%ymm3 -vmovdqa 32(%rdi),%ymm4 -vmovdqa 64(%rdi),%ymm5 -vmovdqa 96(%rdi),%ymm6 -vmovdqa 128(%rdi),%ymm7 -vmovdqa 160(%rdi),%ymm8 -vmovdqa 192(%rdi),%ymm9 -vmovdqa 224(%rdi),%ymm10 - -fqmulprecomp 1,2,3,11 -fqmulprecomp 1,2,4,12 -fqmulprecomp 1,2,5,13 -fqmulprecomp 1,2,6,14 -fqmulprecomp 1,2,7,15 -fqmulprecomp 1,2,8,11 -fqmulprecomp 1,2,9,12 -fqmulprecomp 1,2,10,13 - -#store -vmovdqa %ymm3,(%rdi) -vmovdqa %ymm4,32(%rdi) -vmovdqa %ymm5,64(%rdi) -vmovdqa %ymm6,96(%rdi) -vmovdqa %ymm7,128(%rdi) -vmovdqa %ymm8,160(%rdi) -vmovdqa %ymm9,192(%rdi) -vmovdqa %ymm10,224(%rdi) - -ret - -.global cdecl(tomont_avx) -cdecl(tomont_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 -vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 -call tomont128_avx -add $256,%rdi -call tomont128_avx -ret diff --git a/code/jasmin/mlkem_avx2/fq.inc b/code/jasmin/mlkem_avx2/fq.inc deleted file mode 100644 index 4cb28a8e..00000000 --- a/code/jasmin/mlkem_avx2/fq.inc +++ /dev/null @@ -1,26 +0,0 @@ -.macro red16 r,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -vpsraw $10,%ymm\x,%ymm\x -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm diff --git a/code/jasmin/mlkem_avx2/gen_matrix.jinc.try0 b/code/jasmin/mlkem_avx2/gen_matrix.jinc.try0 deleted file mode 100644 index a13ea596..00000000 --- a/code/jasmin/mlkem_avx2/gen_matrix.jinc.try0 +++ /dev/null @@ -1,940 +0,0 @@ -/** -benchmarks with this file - - our / supercop -key 83712/ 79134 skylake -enc 96680/ 74866 -dec 83562/ 65006 - -key 74472/ 71588 haswell -enc 89524/ 72472 -dec 77096/ 61512 - -key 93730/ 91723 alderlake -enc 109251/ 85250 -dec 97006/ 73901 - -**/ - -require "params.jinc" -require "consts.jinc" -require "shuffle.jinc" -require "fips202.jinc" -require "fips202_4x.jinc" - -param int GENMATRIX_NBLOCKS = ((12*MLKEM_N/8*4096/MLKEM_Q + SHAKE128_RATE)/SHAKE128_RATE); -param int REJ_UNIFORM_AVX_BUFLEN = GENMATRIX_NBLOCKS * SHAKE128_RATE; - -param int USE_AVX2_REJECTION = 0; -param int USE_SQUEEZE_N = 0; - -u8[2048] ru_idx = {-1, -1, -1, -1, -1, -1, -1, -1, - 0, -1, -1, -1, -1, -1, -1, -1, - 2, -1, -1, -1, -1, -1, -1, -1, - 0, 2, -1, -1, -1, -1, -1, -1, - 4, -1, -1, -1, -1, -1, -1, -1, - 0, 4, -1, -1, -1, -1, -1, -1, - 2, 4, -1, -1, -1, -1, -1, -1, - 0, 2, 4, -1, -1, -1, -1, -1, - 6, -1, -1, -1, -1, -1, -1, -1, - 0, 6, -1, -1, -1, -1, -1, -1, - 2, 6, -1, -1, -1, -1, -1, -1, - 0, 2, 6, -1, -1, -1, -1, -1, - 4, 6, -1, -1, -1, -1, -1, -1, - 0, 4, 6, -1, -1, -1, -1, -1, - 2, 4, 6, -1, -1, -1, -1, -1, - 0, 2, 4, 6, -1, -1, -1, -1, - 8, -1, -1, -1, -1, -1, -1, -1, - 0, 8, -1, -1, -1, -1, -1, -1, - 2, 8, -1, -1, -1, -1, -1, -1, - 0, 2, 8, -1, -1, -1, -1, -1, - 4, 8, -1, -1, -1, -1, -1, -1, - 0, 4, 8, -1, -1, -1, -1, -1, - 2, 4, 8, -1, -1, -1, -1, -1, - 0, 2, 4, 8, -1, -1, -1, -1, - 6, 8, -1, -1, -1, -1, -1, -1, - 0, 6, 8, -1, -1, -1, -1, -1, - 2, 6, 8, -1, -1, -1, -1, -1, - 0, 2, 6, 8, -1, -1, -1, -1, - 4, 6, 8, -1, -1, -1, -1, -1, - 0, 4, 6, 8, -1, -1, -1, -1, - 2, 4, 6, 8, -1, -1, -1, -1, - 0, 2, 4, 6, 8, -1, -1, -1, - 10, -1, -1, -1, -1, -1, -1, -1, - 0, 10, -1, -1, -1, -1, -1, -1, - 2, 10, -1, -1, -1, -1, -1, -1, - 0, 2, 10, -1, -1, -1, -1, -1, - 4, 10, -1, -1, -1, -1, -1, -1, - 0, 4, 10, -1, -1, -1, -1, -1, - 2, 4, 10, -1, -1, -1, -1, -1, - 0, 2, 4, 10, -1, -1, -1, -1, - 6, 10, -1, -1, -1, -1, -1, -1, - 0, 6, 10, -1, -1, -1, -1, -1, - 2, 6, 10, -1, -1, -1, -1, -1, - 0, 2, 6, 10, -1, -1, -1, -1, - 4, 6, 10, -1, -1, -1, -1, -1, - 0, 4, 6, 10, -1, -1, -1, -1, - 2, 4, 6, 10, -1, -1, -1, -1, - 0, 2, 4, 6, 10, -1, -1, -1, - 8, 10, -1, -1, -1, -1, -1, -1, - 0, 8, 10, -1, -1, -1, -1, -1, - 2, 8, 10, -1, -1, -1, -1, -1, - 0, 2, 8, 10, -1, -1, -1, -1, - 4, 8, 10, -1, -1, -1, -1, -1, - 0, 4, 8, 10, -1, -1, -1, -1, - 2, 4, 8, 10, -1, -1, -1, -1, - 0, 2, 4, 8, 10, -1, -1, -1, - 6, 8, 10, -1, -1, -1, -1, -1, - 0, 6, 8, 10, -1, -1, -1, -1, - 2, 6, 8, 10, -1, -1, -1, -1, - 0, 2, 6, 8, 10, -1, -1, -1, - 4, 6, 8, 10, -1, -1, -1, -1, - 0, 4, 6, 8, 10, -1, -1, -1, - 2, 4, 6, 8, 10, -1, -1, -1, - 0, 2, 4, 6, 8, 10, -1, -1, - 12, -1, -1, -1, -1, -1, -1, -1, - 0, 12, -1, -1, -1, -1, -1, -1, - 2, 12, -1, -1, -1, -1, -1, -1, - 0, 2, 12, -1, -1, -1, -1, -1, - 4, 12, -1, -1, -1, -1, -1, -1, - 0, 4, 12, -1, -1, -1, -1, -1, - 2, 4, 12, -1, -1, -1, -1, -1, - 0, 2, 4, 12, -1, -1, -1, -1, - 6, 12, -1, -1, -1, -1, -1, -1, - 0, 6, 12, -1, -1, -1, -1, -1, - 2, 6, 12, -1, -1, -1, -1, -1, - 0, 2, 6, 12, -1, -1, -1, -1, - 4, 6, 12, -1, -1, -1, -1, -1, - 0, 4, 6, 12, -1, -1, -1, -1, - 2, 4, 6, 12, -1, -1, -1, -1, - 0, 2, 4, 6, 12, -1, -1, -1, - 8, 12, -1, -1, -1, -1, -1, -1, - 0, 8, 12, -1, -1, -1, -1, -1, - 2, 8, 12, -1, -1, -1, -1, -1, - 0, 2, 8, 12, -1, -1, -1, -1, - 4, 8, 12, -1, -1, -1, -1, -1, - 0, 4, 8, 12, -1, -1, -1, -1, - 2, 4, 8, 12, -1, -1, -1, -1, - 0, 2, 4, 8, 12, -1, -1, -1, - 6, 8, 12, -1, -1, -1, -1, -1, - 0, 6, 8, 12, -1, -1, -1, -1, - 2, 6, 8, 12, -1, -1, -1, -1, - 0, 2, 6, 8, 12, -1, -1, -1, - 4, 6, 8, 12, -1, -1, -1, -1, - 0, 4, 6, 8, 12, -1, -1, -1, - 2, 4, 6, 8, 12, -1, -1, -1, - 0, 2, 4, 6, 8, 12, -1, -1, - 10, 12, -1, -1, -1, -1, -1, -1, - 0, 10, 12, -1, -1, -1, -1, -1, - 2, 10, 12, -1, -1, -1, -1, -1, - 0, 2, 10, 12, -1, -1, -1, -1, - 4, 10, 12, -1, -1, -1, -1, -1, - 0, 4, 10, 12, -1, -1, -1, -1, - 2, 4, 10, 12, -1, -1, -1, -1, - 0, 2, 4, 10, 12, -1, -1, -1, - 6, 10, 12, -1, -1, -1, -1, -1, - 0, 6, 10, 12, -1, -1, -1, -1, - 2, 6, 10, 12, -1, -1, -1, -1, - 0, 2, 6, 10, 12, -1, -1, -1, - 4, 6, 10, 12, -1, -1, -1, -1, - 0, 4, 6, 10, 12, -1, -1, -1, - 2, 4, 6, 10, 12, -1, -1, -1, - 0, 2, 4, 6, 10, 12, -1, -1, - 8, 10, 12, -1, -1, -1, -1, -1, - 0, 8, 10, 12, -1, -1, -1, -1, - 2, 8, 10, 12, -1, -1, -1, -1, - 0, 2, 8, 10, 12, -1, -1, -1, - 4, 8, 10, 12, -1, -1, -1, -1, - 0, 4, 8, 10, 12, -1, -1, -1, - 2, 4, 8, 10, 12, -1, -1, -1, - 0, 2, 4, 8, 10, 12, -1, -1, - 6, 8, 10, 12, -1, -1, -1, -1, - 0, 6, 8, 10, 12, -1, -1, -1, - 2, 6, 8, 10, 12, -1, -1, -1, - 0, 2, 6, 8, 10, 12, -1, -1, - 4, 6, 8, 10, 12, -1, -1, -1, - 0, 4, 6, 8, 10, 12, -1, -1, - 2, 4, 6, 8, 10, 12, -1, -1, - 0, 2, 4, 6, 8, 10, 12, -1, - 14, -1, -1, -1, -1, -1, -1, -1, - 0, 14, -1, -1, -1, -1, -1, -1, - 2, 14, -1, -1, -1, -1, -1, -1, - 0, 2, 14, -1, -1, -1, -1, -1, - 4, 14, -1, -1, -1, -1, -1, -1, - 0, 4, 14, -1, -1, -1, -1, -1, - 2, 4, 14, -1, -1, -1, -1, -1, - 0, 2, 4, 14, -1, -1, -1, -1, - 6, 14, -1, -1, -1, -1, -1, -1, - 0, 6, 14, -1, -1, -1, -1, -1, - 2, 6, 14, -1, -1, -1, -1, -1, - 0, 2, 6, 14, -1, -1, -1, -1, - 4, 6, 14, -1, -1, -1, -1, -1, - 0, 4, 6, 14, -1, -1, -1, -1, - 2, 4, 6, 14, -1, -1, -1, -1, - 0, 2, 4, 6, 14, -1, -1, -1, - 8, 14, -1, -1, -1, -1, -1, -1, - 0, 8, 14, -1, -1, -1, -1, -1, - 2, 8, 14, -1, -1, -1, -1, -1, - 0, 2, 8, 14, -1, -1, -1, -1, - 4, 8, 14, -1, -1, -1, -1, -1, - 0, 4, 8, 14, -1, -1, -1, -1, - 2, 4, 8, 14, -1, -1, -1, -1, - 0, 2, 4, 8, 14, -1, -1, -1, - 6, 8, 14, -1, -1, -1, -1, -1, - 0, 6, 8, 14, -1, -1, -1, -1, - 2, 6, 8, 14, -1, -1, -1, -1, - 0, 2, 6, 8, 14, -1, -1, -1, - 4, 6, 8, 14, -1, -1, -1, -1, - 0, 4, 6, 8, 14, -1, -1, -1, - 2, 4, 6, 8, 14, -1, -1, -1, - 0, 2, 4, 6, 8, 14, -1, -1, - 10, 14, -1, -1, -1, -1, -1, -1, - 0, 10, 14, -1, -1, -1, -1, -1, - 2, 10, 14, -1, -1, -1, -1, -1, - 0, 2, 10, 14, -1, -1, -1, -1, - 4, 10, 14, -1, -1, -1, -1, -1, - 0, 4, 10, 14, -1, -1, -1, -1, - 2, 4, 10, 14, -1, -1, -1, -1, - 0, 2, 4, 10, 14, -1, -1, -1, - 6, 10, 14, -1, -1, -1, -1, -1, - 0, 6, 10, 14, -1, -1, -1, -1, - 2, 6, 10, 14, -1, -1, -1, -1, - 0, 2, 6, 10, 14, -1, -1, -1, - 4, 6, 10, 14, -1, -1, -1, -1, - 0, 4, 6, 10, 14, -1, -1, -1, - 2, 4, 6, 10, 14, -1, -1, -1, - 0, 2, 4, 6, 10, 14, -1, -1, - 8, 10, 14, -1, -1, -1, -1, -1, - 0, 8, 10, 14, -1, -1, -1, -1, - 2, 8, 10, 14, -1, -1, -1, -1, - 0, 2, 8, 10, 14, -1, -1, -1, - 4, 8, 10, 14, -1, -1, -1, -1, - 0, 4, 8, 10, 14, -1, -1, -1, - 2, 4, 8, 10, 14, -1, -1, -1, - 0, 2, 4, 8, 10, 14, -1, -1, - 6, 8, 10, 14, -1, -1, -1, -1, - 0, 6, 8, 10, 14, -1, -1, -1, - 2, 6, 8, 10, 14, -1, -1, -1, - 0, 2, 6, 8, 10, 14, -1, -1, - 4, 6, 8, 10, 14, -1, -1, -1, - 0, 4, 6, 8, 10, 14, -1, -1, - 2, 4, 6, 8, 10, 14, -1, -1, - 0, 2, 4, 6, 8, 10, 14, -1, - 12, 14, -1, -1, -1, -1, -1, -1, - 0, 12, 14, -1, -1, -1, -1, -1, - 2, 12, 14, -1, -1, -1, -1, -1, - 0, 2, 12, 14, -1, -1, -1, -1, - 4, 12, 14, -1, -1, -1, -1, -1, - 0, 4, 12, 14, -1, -1, -1, -1, - 2, 4, 12, 14, -1, -1, -1, -1, - 0, 2, 4, 12, 14, -1, -1, -1, - 6, 12, 14, -1, -1, -1, -1, -1, - 0, 6, 12, 14, -1, -1, -1, -1, - 2, 6, 12, 14, -1, -1, -1, -1, - 0, 2, 6, 12, 14, -1, -1, -1, - 4, 6, 12, 14, -1, -1, -1, -1, - 0, 4, 6, 12, 14, -1, -1, -1, - 2, 4, 6, 12, 14, -1, -1, -1, - 0, 2, 4, 6, 12, 14, -1, -1, - 8, 12, 14, -1, -1, -1, -1, -1, - 0, 8, 12, 14, -1, -1, -1, -1, - 2, 8, 12, 14, -1, -1, -1, -1, - 0, 2, 8, 12, 14, -1, -1, -1, - 4, 8, 12, 14, -1, -1, -1, -1, - 0, 4, 8, 12, 14, -1, -1, -1, - 2, 4, 8, 12, 14, -1, -1, -1, - 0, 2, 4, 8, 12, 14, -1, -1, - 6, 8, 12, 14, -1, -1, -1, -1, - 0, 6, 8, 12, 14, -1, -1, -1, - 2, 6, 8, 12, 14, -1, -1, -1, - 0, 2, 6, 8, 12, 14, -1, -1, - 4, 6, 8, 12, 14, -1, -1, -1, - 0, 4, 6, 8, 12, 14, -1, -1, - 2, 4, 6, 8, 12, 14, -1, -1, - 0, 2, 4, 6, 8, 12, 14, -1, - 10, 12, 14, -1, -1, -1, -1, -1, - 0, 10, 12, 14, -1, -1, -1, -1, - 2, 10, 12, 14, -1, -1, -1, -1, - 0, 2, 10, 12, 14, -1, -1, -1, - 4, 10, 12, 14, -1, -1, -1, -1, - 0, 4, 10, 12, 14, -1, -1, -1, - 2, 4, 10, 12, 14, -1, -1, -1, - 0, 2, 4, 10, 12, 14, -1, -1, - 6, 10, 12, 14, -1, -1, -1, -1, - 0, 6, 10, 12, 14, -1, -1, -1, - 2, 6, 10, 12, 14, -1, -1, -1, - 0, 2, 6, 10, 12, 14, -1, -1, - 4, 6, 10, 12, 14, -1, -1, -1, - 0, 4, 6, 10, 12, 14, -1, -1, - 2, 4, 6, 10, 12, 14, -1, -1, - 0, 2, 4, 6, 10, 12, 14, -1, - 8, 10, 12, 14, -1, -1, -1, -1, - 0, 8, 10, 12, 14, -1, -1, -1, - 2, 8, 10, 12, 14, -1, -1, -1, - 0, 2, 8, 10, 12, 14, -1, -1, - 4, 8, 10, 12, 14, -1, -1, -1, - 0, 4, 8, 10, 12, 14, -1, -1, - 2, 4, 8, 10, 12, 14, -1, -1, - 0, 2, 4, 8, 10, 12, 14, -1, - 6, 8, 10, 12, 14, -1, -1, -1, - 0, 6, 8, 10, 12, 14, -1, -1, - 2, 6, 8, 10, 12, 14, -1, -1, - 0, 2, 6, 8, 10, 12, 14, -1, - 4, 6, 8, 10, 12, 14, -1, -1, - 0, 4, 6, 8, 10, 12, 14, -1, - 2, 4, 6, 8, 10, 12, 14, -1, - 0, 2, 4, 6, 8, 10, 12, 14}; - -inline -fn __shake128_squeezenblocks(stack u64[25] state, stack u8[REJ_UNIFORM_AVX_BUFLEN] out) - -> stack u64[25], stack u8[REJ_UNIFORM_AVX_BUFLEN] -{ - inline int i; - - for i = 0 to GENMATRIX_NBLOCKS - { - state, out[i*SHAKE128_RATE:SHAKE128_RATE] = _shake128_squeezeblock(state, out[i*SHAKE128_RATE:SHAKE128_RATE]); - } - return state, out; -} - -inline -fn __shake128_squeezenblocks4x(reg ptr u256[25] state, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] h0 h1 h2 h3) - -> reg ptr u256[25], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] -{ - inline int i; - - for i = 0 to GENMATRIX_NBLOCKS - { - state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE] = __shake128_squeezeblock4x(state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE]); - } - - return state, h0, h1, h2, h3; -} - -inline -fn __rej_uniform(reg ptr u16[MLKEM_N] rp, reg u64 offset, reg ptr u8[SHAKE128_RATE] buf, inline int buflen) -> reg u64, stack u16[MLKEM_N] -{ - reg u16 val0 val1; - reg u16 t; - reg u64 pos ctr; - reg u8 fl1 fl2; - reg bool cf zf b; - - ctr = offset; - pos = 0; - - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, buflen - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - - while(!b) - { - val0 = (16u)buf[(int)pos]; - pos += 1; - - t = (16u)buf[(int)pos]; - val1 = t; - val1 >>= 4; - - t &= 0x0F; - t <<= 8; - val0 |= t; - pos += 1; - - t = (16u)buf[(int)pos]; - t <<= 4; - val1 |= t; - pos += 1; - - if(val0 < MLKEM_Q) - { - rp[(int)ctr] = val0; - ctr += 1; - } - - if(ctr < MLKEM_N) - { - if(val1 < MLKEM_Q) - { - rp[(int)ctr] = val1; - ctr += 1; - } - } - - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, buflen - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - return ctr, rp; -} - -fn _rej_uniformn(reg ptr u16[MLKEM_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] buf) -> reg u64, reg ptr u16[MLKEM_N] -{ - reg u16 val0 val1; - reg u16 t; - reg u64 pos ctr; - reg u8 fl1 fl2; - reg bool b; - - ctr = 0; - pos = 0; - - ?{ "==" = b } = #CMP_64(pos, 1); - - while(!b) - { - val0 = (16u)buf[(int)pos]; - pos += 1; - - t = (16u)buf[(int)pos]; - val1 = t; - val1 >>= 4; - - t &= 0x0F; - t <<= 8; - val0 |= t; - pos += 1; - - t = (16u)buf[(int)pos]; - t <<= 4; - val1 |= t; - pos += 1; - - if(val0 < MLKEM_Q) - { - rp[(int)ctr] = val0; - ctr += 1; - } - - if(ctr < MLKEM_N) - { - if(val1 < MLKEM_Q) - { - rp[(int)ctr] = val1; - ctr += 1; - } - } - - ?{ "<=u" = b } = #CMP_64(ctr, MLKEM_N - 1); - fl1 = #SETcc(b); - - ?{ "<=u" = b } = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 3); - fl2 = #SETcc(b); - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - return ctr, rp; -} - -u8 ru_ones_s = 1; -u16 ru_mask_s = 0x0FFF; -u8[32] ru_idx8_s = {0, 1, 1, 2, 3, 4, 4, 5, - 6, 7, 7, 8, 9, 10, 10, 11, - 4, 5, 5, 6, 7, 8, 8, 9, - 10, 11, 11, 12, 13, 14, 14, 15}; - -fn _rej_uniform_avx(reg ptr u16[MLKEM_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] buf) -> reg u64, reg ptr u16[MLKEM_N] -{ - reg u256 f0 f1 g0 g1 g2 g3; - reg u256 bound ones mask idx8; - reg u128 f t l h; - reg u64 pos ctr t64 t64_1 t64_2 t64_3; - reg u64 good; - reg u16 val0 val1 t16; - reg ptr u8[2048] idxp; - reg u8 fl1 fl2; - reg bool cf zf b; - - idxp = ru_idx; - - bound = jqx16[u256 0]; - ctr = 0; - pos = 0; - ones = #VPBROADCAST_32u8(ru_ones_s); - mask = #VPBROADCAST_16u16(ru_mask_s); - idx8 = ru_idx8_s[u256 0]; - - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 32); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 48); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - - while(!b) - { - f0 = #VPERMQ(buf.[u256 (int)pos], 0x94); - f1 = #VPERMQ(buf.[u256 24 + (int)pos], 0x94); - f0 = #VPSHUFB_256(f0, idx8); - f1 = #VPSHUFB_256(f1, idx8); - g0 = #VPSRL_16u16(f0, 4); - g1 = #VPSRL_16u16(f1, 4); - f0 = #VPBLEND_16u16(f0, g0, 0xAA); - f1 = #VPBLEND_16u16(f1, g1, 0xAA); - f0 = #VPAND_256(f0, mask); - f1 = #VPAND_256(f1, mask); - - g0 = #VPCMPGT_16u16(bound, f0); - g1 = #VPCMPGT_16u16(bound, f1); - - g0 = #VPACKSS_16u16(g0, g1); - good = #VPMOVMSKB_u256u64(g0); - - t64 = good; - t64 &= 0xFF; - g0 = (256u) #VMOV(idxp[u64 (int)t64]); - - t64_1 = good; - t64_1 >>= 16; - t64_1 &= 0xFF; - l = #VMOV(idxp[u64 (int)t64_1]); - - t64_2 = good; - t64_2 >>= 8; - t64_2 &= 0xFF; - g1 = (256u) #VMOV(idxp[u64 (int)t64_2]); - - t64_3 = good; - t64_3 >>= 24; - t64_3 &= 0xFF; - h = #VMOV(idxp[u64 (int)t64_3]); - - g0 = #VINSERTI128(g0, l, 1); - - _, _, _, _, _, t64 = #POPCNT_64(t64); - _, _, _, _, _, t64_1 = #POPCNT_64(t64_1); - t64 += ctr; - - g1 = #VINSERTI128(g1, h, 1); - - t64_1 += t64; - _, _, _, _, _, t64_2 = #POPCNT_64(t64_2); - t64_2 += t64_1; - _, _, _, _, _, t64_3 = #POPCNT_64(t64_3); - t64_3 += t64_2; - - g2 = #VPADD_32u8(g0, ones); - g0 = #VPUNPCKL_32u8(g0, g2); - g3 = #VPADD_32u8(g1, ones); - g1 = #VPUNPCKL_32u8(g1, g3); - - f0 = #VPSHUFB_256(f0, g0); - f1 = #VPSHUFB_256(f1, g1); - - rp.[u128 2*(int)ctr] = (128u)f0; - rp.[u128 2*(int)t64] = #VEXTRACTI128(f0, 1); - rp.[u128 2*(int)t64_1] = (128u)f1; - rp.[u128 2*(int)t64_2] = #VEXTRACTI128(f1, 1); - - ctr = t64_3; - - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 32); - fl1 = #SETcc(cf || zf); - - pos += 48; - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 48); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 8); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 12); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - - t64 = 0x5555; - while(!b) - { - f = buf.[u128 (int)pos]; - f = #VPSHUFB_128(f, idx8); - t = #VPSRL_8u16(f, 4); - f = #VPBLEND_8u16(f, t, 0xAA); - f = #VPAND_128(f, mask); - - t = #VPCMPGT_8u16(bound, f); - good = #VPMOVMSKB_u128u64(t); - - good = #PEXT_64(good, t64); - l = #VMOV(idxp[u64 (int)good]); - _, _, _, _, _, good = #POPCNT_64(good); - - h = #VPADD_16u8(l, ones); - l = #VPUNPCKL_16u8(l, h); - f = #VPSHUFB_128(f, l); - - rp.[u128 2*(int)ctr] = f; - ctr += good; - - pos += 12; - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 8); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 12); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 1); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 3); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - - while(!b) - { - val0 = (16u)buf[(int)pos]; - pos += 1; - t16 = (16u)buf[(int)pos]; - pos += 1; - val1 = t16; - - t16 <<= 8; - val0 |= t16; - val0 &= 0xFFF; - - val1 >>= 4; - t16 = (16u)buf[(int)pos]; - pos += 1; - t16 <<= 4; - val1 |= t16; - - if(val0 < MLKEM_Q) - { - rp[(int)ctr] = val0; - ctr += 1; - } - if(val1 < MLKEM_Q) - { - if(ctr < MLKEM_N) - { - rp[(int)ctr] = val1; - ctr += 1; - } - } - - _, cf, _, _, zf = #CMP_64(ctr, MLKEM_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - return ctr, rp; -} - - -inline fn __r2s(reg u256 f) -> stack u256 { - stack u256 fs; - fs = f; - return f; -} - - -inline fn __s2r(stack u256 fs) -> reg u256 { - reg u256 f; - f = fs; - return f; -} - - -inline -fn __gen_matrix(stack u8[MLKEM_SYMBYTES] seed, inline int transposed) -> stack u16[MLKEM_K*MLKEM_VECN] -{ - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf0; - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf1; - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf2; - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf3; - stack u256[25] state; - stack u16[MLKEM_K*MLKEM_VECN] rr; - stack u256 fs; - reg u256 f; - reg u64 ctr0 ctr1 ctr2 ctr3 tmp; - stack u64 ctr0_s; - reg u8 flg0 flg1 bflg; - reg bool cf zf; - - inline int i, j; - - f = seed[u256 0]; - buf0[u256 0] = f; - buf1[u256 0] = f; - buf2[u256 0] = f; - buf3[u256 0] = f; - fs = __r2s(f); - - if(transposed == 1) - { - buf0[MLKEM_SYMBYTES] = 0; - buf0[MLKEM_SYMBYTES+1] = 0; - buf1[MLKEM_SYMBYTES] = 0; - buf1[MLKEM_SYMBYTES+1] = 1; - buf2[MLKEM_SYMBYTES] = 0; - buf2[MLKEM_SYMBYTES+1] = 2; - buf3[MLKEM_SYMBYTES] = 1; - buf3[MLKEM_SYMBYTES+1] = 0; - } - else - { - buf0[MLKEM_SYMBYTES] = 0; - buf0[MLKEM_SYMBYTES+1] = 0; - buf1[MLKEM_SYMBYTES] = 1; - buf1[MLKEM_SYMBYTES+1] = 0; - buf2[MLKEM_SYMBYTES] = 2; - buf2[MLKEM_SYMBYTES+1] = 0; - buf3[MLKEM_SYMBYTES] = 0; - buf3[MLKEM_SYMBYTES+1] = 1; - } - - state = _shake128_absorb4x_34(state, buf0[0:34], buf1[0:34], buf2[0:34], buf3[0:34]); - - if ( USE_SQUEEZE_N == 1 ) { - - state, buf0, buf1, buf2, buf3 = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3); - - if (USE_AVX2_REJECTION == 1) { - tmp, rr[0*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[0*MLKEM_VECN+0*MLKEM_N:MLKEM_N], buf0); - ctr0 = tmp; - tmp, rr[0*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[0*MLKEM_VECN+1*MLKEM_N:MLKEM_N], buf1); - ctr1 = tmp; - tmp, rr[0*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[0*MLKEM_VECN+2*MLKEM_N:MLKEM_N], buf2); - ctr2 = tmp; - ctr3, rr[1*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[1*MLKEM_VECN+0*MLKEM_N:MLKEM_N], buf3); - } else { - tmp, rr[0*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[0*MLKEM_VECN+0*MLKEM_N:MLKEM_N], buf0); - ctr0 = tmp; - tmp, rr[0*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[0*MLKEM_VECN+1*MLKEM_N:MLKEM_N], buf1); - ctr1 = tmp; - tmp, rr[0*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[0*MLKEM_VECN+2*MLKEM_N:MLKEM_N], buf2); - ctr2 = tmp; - ctr3, rr[1*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[1*MLKEM_VECN+0*MLKEM_N:MLKEM_N], buf3); - } - - _, cf, _, _, zf = #CMP_64(ctr0, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(ctr1, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - - } - else - { - ctr0 = 0; - ctr1 = 0; - ctr2 = 0; - ctr3 = 0; - flg0 = 1; - bflg = 1; - } - - while(bflg != 0) { - state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE] = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE]); - - ctr0, rr[0*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = __rej_uniform(rr[0*MLKEM_VECN+0*MLKEM_N:MLKEM_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE); - ctr1, rr[0*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = __rej_uniform(rr[0*MLKEM_VECN+1*MLKEM_N:MLKEM_N], ctr1, buf1[0:SHAKE128_RATE], SHAKE128_RATE); - ctr2, rr[0*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = __rej_uniform(rr[0*MLKEM_VECN+2*MLKEM_N:MLKEM_N], ctr2, buf2[0:SHAKE128_RATE], SHAKE128_RATE); - ctr3, rr[1*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = __rej_uniform(rr[1*MLKEM_VECN+0*MLKEM_N:MLKEM_N], ctr3, buf3[0:SHAKE128_RATE], SHAKE128_RATE); - - _, cf, _, _, zf = #CMP_64(ctr0, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr1, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - } - - f = __s2r(fs); - buf0[u256 0] = f; - buf1[u256 0] = f; - buf2[u256 0] = f; - buf3[u256 0] = f; - fs = __r2s(f); - - if(transposed == 1) - { - buf0[MLKEM_SYMBYTES] = 1; - buf0[MLKEM_SYMBYTES+1] = 1; - buf1[MLKEM_SYMBYTES] = 1; - buf1[MLKEM_SYMBYTES+1] = 2; - buf2[MLKEM_SYMBYTES] = 2; - buf2[MLKEM_SYMBYTES+1] = 0; - buf3[MLKEM_SYMBYTES] = 2; - buf3[MLKEM_SYMBYTES+1] = 1; - } - else - { - buf0[MLKEM_SYMBYTES] = 1; - buf0[MLKEM_SYMBYTES+1] = 1; - buf1[MLKEM_SYMBYTES] = 2; - buf1[MLKEM_SYMBYTES+1] = 1; - buf2[MLKEM_SYMBYTES] = 0; - buf2[MLKEM_SYMBYTES+1] = 2; - buf3[MLKEM_SYMBYTES] = 1; - buf3[MLKEM_SYMBYTES+1] = 2; - } - - state = _shake128_absorb4x_34(state, buf0[0:34], buf1[0:34], buf2[0:34], buf3[0:34]); - - if ( USE_SQUEEZE_N == 1 ) { - - state, buf0, buf1, buf2, buf3 = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3); - - if (USE_AVX2_REJECTION == 1) { - tmp, rr[1*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[1*MLKEM_VECN+1*MLKEM_N:MLKEM_N], buf0); - ctr0 = tmp; - tmp, rr[1*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[1*MLKEM_VECN+2*MLKEM_N:MLKEM_N], buf1); - ctr1 = tmp; - tmp, rr[2*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[2*MLKEM_VECN+0*MLKEM_N:MLKEM_N], buf2); - ctr2 = tmp; - ctr3, rr[2*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[2*MLKEM_VECN+1*MLKEM_N:MLKEM_N], buf3); - } else { - tmp, rr[1*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[1*MLKEM_VECN+1*MLKEM_N:MLKEM_N], buf0); - ctr0 = tmp; - tmp, rr[1*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[1*MLKEM_VECN+2*MLKEM_N:MLKEM_N], buf1); - ctr1 = tmp; - tmp, rr[2*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[2*MLKEM_VECN+0*MLKEM_N:MLKEM_N], buf2); - ctr2 = tmp; - ctr3, rr[2*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[2*MLKEM_VECN+1*MLKEM_N:MLKEM_N], buf3); - } - - _, cf, _, _, zf = #CMP_64(ctr0, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr1, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - - } else { - ctr0 = 0; - ctr1 = 0; - ctr2 = 0; - ctr3 = 0; - flg0 = 1; - bflg = 1; - } - - - - while(bflg != 0) { - state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE] = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE]); - - ctr0, rr[1*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = __rej_uniform(rr[1*MLKEM_VECN+1*MLKEM_N:MLKEM_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE); - ctr1, rr[1*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = __rej_uniform(rr[1*MLKEM_VECN+2*MLKEM_N:MLKEM_N], ctr1, buf1[0:SHAKE128_RATE], SHAKE128_RATE); - ctr2, rr[2*MLKEM_VECN+0*MLKEM_N:MLKEM_N] = __rej_uniform(rr[2*MLKEM_VECN+0*MLKEM_N:MLKEM_N], ctr2, buf2[0:SHAKE128_RATE], SHAKE128_RATE); - ctr3, rr[2*MLKEM_VECN+1*MLKEM_N:MLKEM_N] = __rej_uniform(rr[2*MLKEM_VECN+1*MLKEM_N:MLKEM_N], ctr3, buf3[0:SHAKE128_RATE], SHAKE128_RATE); - - _, cf, _, _, zf = #CMP_64(ctr0, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr1, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, MLKEM_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, MLKEM_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - } - - f = __s2r(fs); - buf0[u256 0] = f; - buf0[MLKEM_SYMBYTES] = 2; - buf0[MLKEM_SYMBYTES+1] = 2; - - state[u64 0:25] = _shake128_absorb34(state[u64 0:25], buf0[0:34]); - - if ( USE_SQUEEZE_N == 1 ) { - - state[u64 0:25], buf0 = __shake128_squeezenblocks(state[u64 0:25], buf0); - - if (USE_AVX2_REJECTION == 1) { - ctr0, rr[2*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = _rej_uniform_avx(rr[2*MLKEM_VECN+2*MLKEM_N:MLKEM_N], buf0); - } else { - ctr0, rr[2*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = _rej_uniformn(rr[2*MLKEM_VECN+2*MLKEM_N:MLKEM_N], buf0); - } - - _, cf, _, _, zf = #CMP_64(ctr0, MLKEM_N - 1); - bflg = #SETcc(cf || zf); - - } else { - ctr0 = 0; - bflg = 1; - } - - while(bflg != 0) { - ctr0_s = ctr0; - state[u64 0:25], buf0[0:SHAKE128_RATE] = _shake128_squeezeblock(state[u64 0:25], buf0[0:SHAKE128_RATE]); - ctr0 = ctr0_s; - - ctr0, rr[2*MLKEM_VECN+2*MLKEM_N:MLKEM_N] = __rej_uniform(rr[2*MLKEM_VECN+2*MLKEM_N:MLKEM_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE); - - _, cf, _, _, zf = #CMP_64(ctr0, MLKEM_N - 1); - bflg = #SETcc(cf || zf); - } - - for i = 0 to MLKEM_K - { - for j = 0 to MLKEM_K - { - rr[i*MLKEM_VECN+j*MLKEM_N:MLKEM_N] = _nttunpack(rr[i*MLKEM_VECN+j*MLKEM_N:MLKEM_N]); - } - } - - return rr; -} diff --git a/code/jasmin/mlkem_avx2/indcpa.c b/code/jasmin/mlkem_avx2/indcpa.c deleted file mode 100644 index 9e2f9e35..00000000 --- a/code/jasmin/mlkem_avx2/indcpa.c +++ /dev/null @@ -1,320 +0,0 @@ -#include <stdint.h> -#include "indcpa.h" -#include "poly.h" -#include "polyvec.h" -#include "ntt.h" -#include "symmetric.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: unsigned char *r: pointer to the output serialized public key -* const poly *pk: pointer to the input public-key polynomial -* const unsigned char *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(unsigned char *r, polyvec *pk, const unsigned char *seed) -{ - int i; - polyvec_tobytes(r, pk); - for(i=0;i<MLKEM_SYMBYTES;i++) - r[i+MLKEM_POLYVECBYTES] = seed[i]; -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key vector of polynomials -* - unsigned char *seed: pointer to output seed to generate matrix A -* - const unsigned char *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, unsigned char *seed, const unsigned char *packedpk) -{ - int i; - polyvec_frombytes(pk, packedpk); - for(i=0;i<MLKEM_SYMBYTES;i++) - seed[i] = packedpk[i+MLKEM_POLYVECBYTES]; -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - unsigned char *r: pointer to output serialized secret key -* - const polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(unsigned char *r, polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; -* inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const unsigned char *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const unsigned char *packedsk) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: unsigned char *r: pointer to the output serialized ciphertext -* const poly *pk: pointer to the input vector of polynomials b -* const unsigned char *seed: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(unsigned char *r, polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+MLKEM_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const unsigned char *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const unsigned char *c) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+MLKEM_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const unsigned char *buf: pointer to input buffer (assumed to be uniform random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, unsigned int len, const unsigned char *buf, unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val1, val2; - - ctr = pos = 0; - while(ctr < len && pos + 3 <= buflen) - { - val1 = buf[pos] | ((uint16_t)(buf[pos+1] & 0x0F) << 8); - pos ++; - val2 = ((buf[pos] >> 4) | ((uint16_t)buf[pos+1] << 4)); - pos += 2; - - if(val1 < MLKEM_Q) - { - r[ctr++] = (int16_t)val1; - } - - if(val2 < MLKEM_Q && ctr < len) { - r[ctr++] = (int16_t)val2; - } - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const unsigned char *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -static void gen_matrix(polyvec *a, const unsigned char *seed, int transposed) // Not static for benchmarking -{ - unsigned int ctr, i, j; - const unsigned int maxnblocks=(530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES; /* 530 is expected number of required bytes */ - unsigned char buf[XOF_BLOCKBYTES*maxnblocks+1]; - xof_state state; - - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_K;j++) - { - if(transposed) { - xof_absorb(&state, seed, i, j); - } - else { - xof_absorb(&state, seed, j, i); - } - - xof_squeezeblocks(buf, maxnblocks, &state); - ctr = rej_uniform(a[i].vec[j].coeffs, MLKEM_N, buf, maxnblocks*XOF_BLOCKBYTES); - - while(ctr < MLKEM_N) - { - xof_squeezeblocks(buf, 1, &state); - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, MLKEM_N - ctr, buf, XOF_BLOCKBYTES); - } - } - } -} - -/************************************************* -* Name: indcpa_keypair -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - unsigned char *pk: pointer to output public key (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) -* - unsigned char *sk: pointer to output private key (of length MLKEM_INDCPA_SECRETKEYBYTES bytes) -**************************************************/ -void indcpa_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness) -{ - polyvec a[MLKEM_K], e, pkpv, skpv; - unsigned char buf[2*MLKEM_SYMBYTES]; - unsigned char *publicseed = buf; - unsigned char *noiseseed = buf+MLKEM_SYMBYTES; - int i; - unsigned char nonce=0; - - //randombytes(buf, MLKEM_SYMBYTES); - for(i=0;i<MLKEM_SYMBYTES;i++) - buf[i] = randomness[i]; - - hash_g(buf, buf, MLKEM_SYMBYTES); - - gen_a(a, publicseed); - - for(i=0;i<MLKEM_K;i++) - poly_getnoise_eta1(skpv.vec+i, noiseseed, nonce++); - for(i=0;i<MLKEM_K;i++) - poly_getnoise_eta1(e.vec+i, noiseseed, nonce++); - - polyvec_ntt(&skpv); - polyvec_ntt(&e); - - // matrix-vector multiplication - for(i=0;i<MLKEM_K;i++) { - polyvec_pointwise_acc(&pkpv.vec[i], &a[i], &skpv); - poly_frommont(&pkpv.vec[i]); - } - - polyvec_add(&pkpv, &pkpv, &e); - polyvec_reduce(&pkpv); - - pack_sk(sk, &skpv); - pack_pk(pk, &pkpv, publicseed); -} - -/************************************************* -* Name: indcpa_enc -* -* Description: Encryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - unsigned char *c: pointer to output ciphertext (of length MLKEM_INDCPA_BYTES bytes) -* - const unsigned char *m: pointer to input message (of length MLKEM_INDCPA_MSGBYTES bytes) -* - const unsigned char *pk: pointer to input public key (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) -* - const unsigned char *coin: pointer to input random coins used as seed (of length MLKEM_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void indcpa_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins) -{ - polyvec sp, pkpv, ep, at[MLKEM_K], bp; - poly v, k, epp; - unsigned char seed[MLKEM_SYMBYTES]; - int i; - unsigned char nonce=0; - - unpack_pk(&pkpv, seed, pk); - poly_frommsg(&k, m); - gen_at(at, seed); - - for(i=0;i<MLKEM_K;i++) - poly_getnoise_eta1(sp.vec+i, coins, nonce++); - for(i=0;i<MLKEM_K;i++) - poly_getnoise_eta1(ep.vec+i, coins, nonce++); - poly_getnoise_eta1(&epp, coins, nonce++); - - polyvec_ntt(&sp); - - // matrix-vector multiplication - for(i=0;i<MLKEM_K;i++) - polyvec_pointwise_acc(&bp.vec[i], &at[i], &sp); - - polyvec_pointwise_acc(&v, &pkpv, &sp); - - polyvec_invntt(&bp); - poly_invntt(&v); - - polyvec_add(&bp, &bp, &ep); - poly_add(&v, &v, &epp); - poly_add(&v, &v, &k); - polyvec_reduce(&bp); - poly_reduce(&v); - - pack_ciphertext(c, &bp, &v); -} - -/************************************************* -* Name: indcpa_dec -* -* Description: Decryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - unsigned char *m: pointer to output decrypted message (of length MLKEM_INDCPA_MSGBYTES) -* - const unsigned char *c: pointer to input ciphertext (of length MLKEM_INDCPA_BYTES) -* - const unsigned char *sk: pointer to input secret key (of length MLKEM_INDCPA_SECRETKEYBYTES) -**************************************************/ -void indcpa_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk) -{ - polyvec bp, skpv; - poly v, mp; - - unpack_ciphertext(&bp, &v, c); - unpack_sk(&skpv, sk); - - - polyvec_ntt(&bp); - polyvec_pointwise_acc(&mp, &skpv, &bp); - poly_invntt(&mp); - - poly_sub(&mp, &v, &mp); - poly_reduce(&mp); - - poly_tomsg(m, &mp); -} diff --git a/code/jasmin/mlkem_avx2/indcpa.h b/code/jasmin/mlkem_avx2/indcpa.h deleted file mode 100644 index f07fc16a..00000000 --- a/code/jasmin/mlkem_avx2/indcpa.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef INDCPA_H -#define INDCPA_H - -#include <stdint.h> - -void indcpa_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - - - -void indcpa_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/mlkem_avx2/kem.c b/code/jasmin/mlkem_avx2/kem.c deleted file mode 100644 index 9409e584..00000000 --- a/code/jasmin/mlkem_avx2/kem.c +++ /dev/null @@ -1,145 +0,0 @@ -#include <stdint.h> -#include <stddef.h> -#include <string.h> -#include "kem.h" -#include "indcpa.h" -#include "symmetric.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -uint64_t verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint8_t r; - - r = 0; - for(i=0; i < len; i ++) - r |= a[i] ^ b[i]; - - return (-(uint64_t)r) >> 63; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - - b = -b; - for(i=0; i < len; i++) - r[i] ^= b & (r[i] ^ x[i]); -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key for the CCA-secure -* Kyber key encapsulation mechanism -* -* Arguments: - unsigned char *pk: pointer to output public key -* - unsigned char *sk: pointer to output private key -**************************************************/ -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness) -{ - indcpa_keypair(pk, sk, randomness); - - memcpy(sk+MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCPA_PUBLICKEYBYTES); - - hash_h(sk+MLKEM_SECRETKEYBYTES-2*MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES); - - memcpy(sk+MLKEM_SECRETKEYBYTES-MLKEM_SYMBYTES, randomness + MLKEM_SYMBYTES, MLKEM_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - unsigned char *c: pointer to output ciphertext (of length MLKEM_INDCPA_BYTES bytes) -* - const unsigned char *m: pointer to input message (of length MLKEM_INDCPA_MSGBYTES bytes) -* - const unsigned char *pk: pointer to input public key (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) -* - const unsigned char *coin: pointer to input random coins used as seed (of length MLKEM_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void crypto_kem_enc(unsigned char *ct, - unsigned char *ss, - const unsigned char *pk, - const unsigned char *coins) -{ - uint8_t buf[2*MLKEM_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*MLKEM_SYMBYTES]; - - memcpy(buf, coins, MLKEM_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES); - hash_g(kr, buf, 2*MLKEM_SYMBYTES); - - /* coins are in kr+MLKEM_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+MLKEM_SYMBYTES); - - memcpy(ss,kr,MLKEM_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - unsigned char *m: pointer to output decrypted message (of length MLKEM_INDCPA_MSGBYTES) -* - const unsigned char *c: pointer to input ciphertext (of length MLKEM_INDCPA_BYTES) -* - const unsigned char *sk: pointer to input secret key (of length MLKEM_INDCPA_SECRETKEYBYTES) -**************************************************/ -void crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*MLKEM_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*MLKEM_SYMBYTES]; - uint8_t cmp[MLKEM_CIPHERTEXTBYTES+MLKEM_SYMBYTES]; - const uint8_t *pk = sk+MLKEM_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+MLKEM_SYMBYTES, sk+MLKEM_SECRETKEYBYTES-2*MLKEM_SYMBYTES, MLKEM_SYMBYTES); - hash_g(kr, buf, 2*MLKEM_SYMBYTES); - - /* coins are in kr+MLKEM_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+MLKEM_SYMBYTES); - - fail = verify(ct, cmp, MLKEM_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+MLKEM_SECRETKEYBYTES-MLKEM_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,MLKEM_SYMBYTES,!fail); -} diff --git a/code/jasmin/mlkem_avx2/kem.h b/code/jasmin/mlkem_avx2/kem.h deleted file mode 100644 index 130c142d..00000000 --- a/code/jasmin/mlkem_avx2/kem.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef CRYPTO_KEM_H -#define CRYPTO_KEM_H - -#include <stdint.h> - -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void crypto_kem_enc(unsigned char *c, - unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void crypto_kem_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - -void jade_kem_mlkem_mlkem768_amd64_avx2_keypair_derand(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void jade_kem_mlkem_mlkem768_amd64_avx2_enc_derand(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - - -void jade_kem_mlkem_mlkem768_amd64_avx2_keypair(unsigned char *pk, - unsigned char *sk); - -void jade_kem_mlkem_mlkem768_amd64_avx2_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk); - -void jade_kem_mlkem_mlkem768_amd64_avx2_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/mlkem_avx2/ntt.S b/code/jasmin/mlkem_avx2/ntt.S deleted file mode 100644 index 30f7c210..00000000 --- a/code/jasmin/mlkem_avx2/ntt.S +++ /dev/null @@ -1,198 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" -.include "fq.inc" - -# We break the dependency chains with the cost of slightly more additions. -# But they can be run in parallel to the multiplications on execution port 5 -# (multiplications only go to ports 0 and 1) -.macro butterfly2 rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,x=3,y=2,zl0=15,zl1=15,zh0=1,zh1=1 -#mul -vpmullw %ymm\zl0,%ymm\rh0,%ymm12 -vpmulhw %ymm\zh0,%ymm\rh0,%ymm\x -vpmullw %ymm\zl0,%ymm\rh1,%ymm13 -vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh0 -vpmullw %ymm\zl1,%ymm\rh2,%ymm14 -vpmulhw %ymm\zh1,%ymm\rh2,%ymm\y -vpmullw %ymm\zl1,%ymm\rh3,%ymm15 -vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh2 - -#reduce -vpmulhw %ymm0,%ymm12,%ymm12 -vpmulhw %ymm0,%ymm13,%ymm13 -vpmulhw %ymm0,%ymm14,%ymm14 -vpmulhw %ymm0,%ymm15,%ymm15 - -vpsubw %ymm\rh0,%ymm\rl1,%ymm\rh1 -vpaddw %ymm\rh0,%ymm\rl1,%ymm\rl1 -vpsubw %ymm\x,%ymm\rl0,%ymm\rh0 -vpaddw %ymm\x,%ymm\rl0,%ymm\rl0 -vpsubw %ymm\rh2,%ymm\rl3,%ymm\rh3 -vpaddw %ymm\rh2,%ymm\rl3,%ymm\rl3 -vpsubw %ymm\y,%ymm\rl2,%ymm\rh2 -vpaddw %ymm\y,%ymm\rl2,%ymm\rl2 - -#update -vpaddw %ymm12,%ymm\rh0,%ymm\rh0 -vpsubw %ymm12,%ymm\rl0,%ymm\rl0 -vpaddw %ymm13,%ymm\rh1,%ymm\rh1 -vpsubw %ymm13,%ymm\rl1,%ymm\rl1 -vpaddw %ymm14,%ymm\rh2,%ymm\rh2 -vpsubw %ymm14,%ymm\rl2,%ymm\rl2 -vpaddw %ymm15,%ymm\rh3,%ymm\rh3 -vpsubw %ymm15,%ymm\rl3,%ymm\rl3 -.endm - -.text -ntt_level0_avx: -level0: -#zetas -vpbroadcastd (%rsi),%ymm15 -vpbroadcastd 4(%rsi),%ymm1 - -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 256(%rdi),%ymm8 -vmovdqa 288(%rdi),%ymm9 -vmovdqa 320(%rdi),%ymm10 -vmovdqa 352(%rdi),%ymm11 - -butterfly2 4,5,6,7,8,9,10,11 - -#store -vmovdqa %ymm4,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm6,64(%rdi) -vmovdqa %ymm7,96(%rdi) -vmovdqa %ymm8,256(%rdi) -vmovdqa %ymm9,288(%rdi) -vmovdqa %ymm10,320(%rdi) -vmovdqa %ymm11,352(%rdi) - -ret - -ntt_levels1t6_avx: -level1: -#zetas -vpbroadcastd (%rsi),%ymm15 -vpbroadcastd 4(%rsi),%ymm1 - -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -butterfly2 4,5,6,7,8,9,10,11,3 - -level2: -#zetas -vmovdqu 8(%rsi),%ymm15 -vmovdqu 40(%rsi),%ymm1 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -butterfly2 3,8,4,9,5,10,6,11,7 - -level3: -#zetas -vmovdqu 72(%rsi),%ymm15 -vmovdqu 104(%rsi),%ymm1 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -butterfly2 7,5,3,10,8,6,4,11,9 - -level4: -#zetas -vmovdqu 136(%rsi),%ymm15 -vmovdqu 168(%rsi),%ymm1 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -butterfly2 9,8,7,6,5,4,3,11,10 - -level5: -#zetas -vmovdqu 200(%rsi),%ymm15 -vmovdqu 232(%rsi),%ymm1 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -butterfly2 10,5,9,4,8,3,7,11,6 - -level6: -#zetas -vmovdqu 264(%rsi),%ymm14 -vmovdqu 328(%rsi),%ymm15 -vmovdqu 296(%rsi),%ymm1 -vmovdqu 360(%rsi),%ymm2 - -butterfly2 10,5,8,3,9,4,7,11,6,1,14,15,1,2 - -vmovdqa _16XV*2(%rdx),%ymm1 -red16 10,12 -red16 5,13 -red16 9,14 -red16 4,15 -red16 8,2 -red16 3,6 -red16 7,12 -red16 11,13 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttl0_avx) -cdecl(nttl0_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -mov %rsi,%rdx -add $_ZETAS_EXP*2,%rsi -call ntt_level0_avx -add $128,%rdi -call ntt_level0_avx -ret - - -.global cdecl(nttl1t6_avx) -cdecl(nttl1t6_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -mov %rsi,%rdx -add $_ZETAS_EXP*2,%rsi - -add $8,%rsi -call ntt_levels1t6_avx -add $256,%rdi -add $392,%rsi -call ntt_levels1t6_avx - -ret diff --git a/code/jasmin/mlkem_avx2/ntt.c b/code/jasmin/mlkem_avx2/ntt.c deleted file mode 100644 index 7be9370e..00000000 --- a/code/jasmin/mlkem_avx2/ntt.c +++ /dev/null @@ -1,152 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "ntt.h" -#include "reduce.h" - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define MLKEM_ROOT_OF_UNITY 17 - -static const uint16_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; - - -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -void init_ntt() { - unsigned int i, j, k; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i = 1; i < 128; ++i) - tmp[i] = fqmul(tmp[i-1], MLKEM_ROOT_OF_UNITY*MONT % MLKEM_Q); - - for(i = 0; i < 128; ++i) - zetas[i] = tmp[tree[i]]; - - k = 0; - for(i = 64; i >= 1; i >>= 1) - for(j = i; j < 2*i; ++j) - zetas_inv[k++] = -tmp[128 - tree[j]]; - - zetas_inv[127] = MONT * (MONT * (MLKEM_Q - 1) * ((MLKEM_Q - 1)/128) % MLKEM_Q) % MLKEM_Q; -} - -*/ -int16_t zetas[128] = { - 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628}; - -int16_t zetas_inv[128] = { - 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441}; - - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for(len = 128; len >= 2; len >>= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for(j = start; j < start + len; ++j) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt -* -* Description: Inplace inverse number-theoretic transform in Rq -* input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - - k = 0; - for(len = 2; len <= 128; len <<= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas_inv[k++]; - for(j = start; j < start + len; ++j) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = t - r[j + len]; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for(j = 0; j < 256; ++j) - r[j] = fqmul(r[j], zetas_inv[127]); -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/code/jasmin/mlkem_avx2/ntt.h b/code/jasmin/mlkem_avx2/ntt.h deleted file mode 100644 index dd5498a2..00000000 --- a/code/jasmin/mlkem_avx2/ntt.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include <stdint.h> -#include "params.h" -#include "consts.h" - -extern int16_t zetas[128]; -extern int16_t zetas_inv[128]; - -void invntt(int16_t *poly); -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#define ntt_avx MLKEM_NAMESPACE(ntt_avx) -//void ntt_avx(int16_t *r, const uint16_t *qdata); -//void nttl0_avx(int16_t *r, const uint16_t *qdata); -//void nttl1t6_avx(int16_t *r, const uint16_t *qdata); -#define invntt_avx MLKEM_NAMESPACE(invntt_avx) -//void invntt_avx(int16_t *r, const uint16_t *qdata); - -#define nttpack_avx MLKEM_NAMESPACE(nttpack_avx) -void nttpack_avx(int16_t *r, const uint16_t *qdata); -#define nttunpack_avx MLKEM_NAMESPACE(nttunpack_avx) -void nttunpack_avx(int16_t *r, const uint16_t *qdata); - -#define basemul_avx MLKEM_NAMESPACE(basemul_avx) -void basemul_avx(int16_t *r, - const int16_t *a, - const int16_t *b, - const uint16_t *qdata); -#define basemul_acc_avx MLKEM_NAMESPACE(basemul_acc_avx) -void basemul_acc_avx(int16_t *r, - const int16_t *a, - const int16_t *b, - const uint16_t *qdata); - -#define ntttobytes_avx MLKEM_NAMESPACE(ntttobytes_avx) -void ntttobytes_avx(uint8_t *r, const int16_t *a, const uint16_t *qdata); -#define nttfrombytes_avx MLKEM_NAMESPACE(nttfrombytes_avx) -void nttfrombytes_avx(int16_t *r, const uint8_t *a, const uint16_t *qdata); - - -void ntt(int16_t *poly); - -#endif diff --git a/code/jasmin/mlkem_avx2/params.h b/code/jasmin/mlkem_avx2/params.h deleted file mode 100644 index ef12be20..00000000 --- a/code/jasmin/mlkem_avx2/params.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef MLKEM_K -#define MLKEM_K 3 /* Change this for different security strengths */ -#endif - -#define MLKEM_NAMESPACE(s) s - -/* Don't change parameters below this line */ - -#define MLKEM_N 256 -#define MLKEM_Q 3329 - - - -#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define MLKEM_SSBYTES 32 /* size in bytes of shared key */ - -#define MLKEM_POLYBYTES 384 -#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) - - -#if MLKEM_K == 2 -#define MLKEM_ETA1 3 -#define MLKEM_POLYCOMPRESSEDBYTES 128 -#define MLKEM_POLYVECCOMPRESSEDBYTES (MLKEM_K * 320) -#elif MLKEM_K == 3 -#define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES 128 -#define MLKEM_POLYVECCOMPRESSEDBYTES (MLKEM_K * 320) -#elif MLKEM_K == 4 -#define MLKEM_ETA1 2 -#define MLKEM_POLYCOMPRESSEDBYTES 160 -#define MLKEM_POLYVECCOMPRESSEDBYTES (MLKEM_K * 352) -#endif - -#define MLKEM_ETA2 2 - -#define MLKEM_INDCPA_MSGBYTES MLKEM_SYMBYTES -#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES) -#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES) -#define MLKEM_INDCPA_BYTES (MLKEM_POLYVECCOMPRESSEDBYTES + MLKEM_POLYCOMPRESSEDBYTES) - -#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES) -#define MLKEM_SECRETKEYBYTES (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + 2*MLKEM_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ -#define MLKEM_CIPHERTEXTBYTES MLKEM_INDCPA_BYTES -#define MLKEM_SSBYTES 32 - -#endif diff --git a/code/jasmin/mlkem_avx2/poly.c b/code/jasmin/mlkem_avx2/poly.c deleted file mode 100644 index 02969ae8..00000000 --- a/code/jasmin/mlkem_avx2/poly.c +++ /dev/null @@ -1,378 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYCOMPRESSEDBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(unsigned char *r, poly *a) -{ - uint8_t t[8]; - int i,j,k=0; - - poly_csubq(a); - -#if (MLKEM_POLYCOMPRESSEDBYTES == 96) - for(i=0;i<MLKEM_N;i+=8) - { - for(j=0;j<8;j++) - t[j] = ((((uint32_t)a->coeffs[i+j] << 3) + MLKEM_Q/2) / MLKEM_Q) & 7; - - r[k] = t[0] | (t[1] << 3) | (t[2] << 6); - r[k+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); - r[k+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); - k += 3; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 128) - for(i=0;i<MLKEM_N;i+=8) - { - for(j=0;j<8;j++) - t[j] = ((((uint32_t)a->coeffs[i+j] << 4) + MLKEM_Q/2) / MLKEM_Q) & 15; - - r[k] = t[0] | (t[1] << 4); - r[k+1] = t[2] | (t[3] << 4); - r[k+2] = t[4] | (t[5] << 4); - r[k+3] = t[6] | (t[7] << 4); - k += 4; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 160) - for(i=0;i<MLKEM_N;i+=8) - { - for(j=0;j<8;j++) - t[j] = ((((uint32_t)a->coeffs[i+j] << 5) + MLKEM_Q/2) / MLKEM_Q) & 31; - - r[k] = t[0] | (t[1] << 5); - r[k+1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); - r[k+2] = (t[3] >> 1) | (t[4] << 4); - r[k+3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); - r[k+4] = (t[6] >> 2) | (t[7] << 3); - k += 5; - } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of length MLKEM_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const unsigned char *a) -{ - int i; -#if (MLKEM_POLYCOMPRESSEDBYTES == 96) - for(i=0;i<MLKEM_N;i+=8) - { - r->coeffs[i+0] = (((a[0] & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+1] = ((((a[0] >> 3) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+2] = ((((a[0] >> 6) | ((a[1] << 2) & 4)) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+3] = ((((a[1] >> 1) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+4] = ((((a[1] >> 4) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+5] = ((((a[1] >> 7) | ((a[2] << 1) & 6)) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+6] = ((((a[2] >> 2) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+7] = ((((a[2] >> 5)) * MLKEM_Q) + 4) >> 3; - a += 3; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 128) - for(i=0;i<MLKEM_N;i+=8) - { - r->coeffs[i+0] = (((a[0] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+1] = (((a[0] >> 4) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+2] = (((a[1] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+3] = (((a[1] >> 4) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+4] = (((a[2] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+5] = (((a[2] >> 4) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+6] = (((a[3] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+7] = (((a[3] >> 4) * MLKEM_Q) + 8) >> 4; - a += 4; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 160) - for(i=0;i<MLKEM_N;i+=8) - { - r->coeffs[i+0] = (((a[0] & 31) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+1] = ((((a[0] >> 5) | ((a[1] & 3) << 3)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+2] = ((((a[1] >> 2) & 31) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+3] = ((((a[1] >> 7) | ((a[2] & 15) << 1)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+4] = ((((a[2] >> 4) | ((a[3] & 1) << 4)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+5] = ((((a[3] >> 1) & 31) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+6] = ((((a[3] >> 6) | ((a[4] & 7) << 2)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+7] = (((a[4] >> 3) * MLKEM_Q) + 16) >> 5; - a += 5; - } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(unsigned char *r, poly *a) -{ - int i; - uint16_t t0, t1; - - poly_csubq(a); - - for(i=0;i<MLKEM_N/2;i++){ - t0 = a->coeffs[2*i]; - t1 = a->coeffs[2*i+1]; - r[3*i] = t0 & 0xff; - r[3*i+1] = (t0 >> 8) | ((t1 & 0xf) << 4); - r[3*i+2] = t1 >> 4; - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of MLKEM_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const unsigned char *a) -{ - int i; - - for(i=0;i<MLKEM_N/2;i++){ - r->coeffs[2*i] = a[3*i] | ((uint16_t)a[3*i+1] & 0x0f) << 8; - r->coeffs[2*i+1] = a[3*i+1] >> 4 | ((uint16_t)a[3*i+2] & 0xff) << 4; - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter MLKEM_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *seed: pointer to input seed (pointing to array of length MLKEM_SYMBYTES bytes) -* - unsigned char nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const unsigned char *seed, unsigned char nonce) -{ - uint8_t buf[MLKEM_ETA1*MLKEM_N/4]; - - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta1(r, buf); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter MLKEM_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length MLKEM_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[MLKEM_ETA2*MLKEM_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta2(r, buf); -} - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt(poly *r) -{ - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - - for(i = 0; i < MLKEM_N/4; ++i) { - basemul(r->coeffs + 4*i, a->coeffs + 4*i, b->coeffs + 4*i, zetas[64 + i]); - basemul(r->coeffs + 4*i + 2, a->coeffs + 4*i + 2, b->coeffs + 4*i + 2, -zetas[64 + i]); - } -} - -/************************************************* -* Name: poly_frommont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from Montgomery domain to normal domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_frommont(poly *r) -{ - int i; - const int16_t f = (1ULL << 32) % MLKEM_Q; - - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - int i; - - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = barrett_reduce(r->coeffs[i]); -} - -/************************************************* -* Name: poly_csubq -* -* Description: Applies conditional subtraction of q to each coefficient of a polynomial -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_csubq(poly *r) -{ - int i; - - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = csubq(r->coeffs[i]); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = a->coeffs[i] - b->coeffs[i]; -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const unsigned char msg[MLKEM_SYMBYTES]) -{ - int i,j; - uint16_t mask; - - for(i=0;i<MLKEM_SYMBYTES;i++) - { - for(j=0;j<8;j++) - { - mask = -((msg[i] >> j)&1); - r->coeffs[8*i+j] = mask & ((MLKEM_Q+1)/2); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - unsigned char *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(unsigned char msg[MLKEM_SYMBYTES], poly *a) -{ - uint16_t t; - int i,j; - - poly_csubq(a); - - for(i=0;i<MLKEM_SYMBYTES;i++) - { - msg[i] = 0; - for(j=0;j<8;j++) - { - t = (((a->coeffs[8*i+j] << 1) + MLKEM_Q/2) / MLKEM_Q) & 1; - msg[i] |= t << j; - } - } -} diff --git a/code/jasmin/mlkem_avx2/poly.h b/code/jasmin/mlkem_avx2/poly.h deleted file mode 100644 index a583f034..00000000 --- a/code/jasmin/mlkem_avx2/poly.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include <stdint.h> -#include "params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct{ - int16_t __attribute__((aligned(32))) coeffs[MLKEM_N]; -} poly; - -void poly_compress(unsigned char *r, poly *a); -void poly_decompress(poly *r, const unsigned char *a); - -void poly_tobytes(unsigned char *r, poly *a); -void poly_frombytes(poly *r, const unsigned char *a); - -void poly_frommsg(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); -void poly_tomsg(unsigned char msg[MLKEM_SYMBYTES], poly *r); - -void poly_getnoise_eta1(poly *r,const unsigned char *seed, unsigned char nonce); -void poly_getnoise_eta2(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt(poly *r); -void poly_invntt(poly *r); -void poly_basemul(poly *r, const poly *a, const poly *b); -void poly_frommont(poly *r); - -void poly_reduce(poly *r); -void poly_csubq(poly *r); - -void poly_add(poly *r, const poly *a, const poly *b); -void poly_sub(poly *r, const poly *a, const poly *b); - - - - - - -void poly_compress_jazz(unsigned char *r, poly *a); -void poly_decompress_jazz(poly *r, const unsigned char *a); - -void poly_tobytes_jazz(unsigned char *r, poly *a); -void poly_frombytes_jazz(poly *r, const unsigned char *a); - -void poly_frommsg_jazz(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); -void poly_tomsg_jazz(unsigned char msg[MLKEM_SYMBYTES], poly *r); - - -void poly_getnoise_eta1_4x_jazz(poly *r,const unsigned char *seed, unsigned char nonce); -void poly_getnoise_eta1122_4x_jazz(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt_jazz(poly *r); -void poly_invntt_jazz(poly *r); -void poly_basemul_jazz(poly *r, const poly *a, const poly *b); -void poly_frommont_jazz(poly *r); - -void poly_reduce_jazz(poly *r); -void poly_csubq_jazz(poly *r); - -void poly_add2_jazz(poly *r, const poly *b); -void poly_sub_jazz(poly *r, const poly *a, const poly *b); - - -#endif diff --git a/code/jasmin/mlkem_avx2/poly_ntt.c b/code/jasmin/mlkem_avx2/poly_ntt.c deleted file mode 100644 index 83341812..00000000 --- a/code/jasmin/mlkem_avx2/poly_ntt.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "poly.h" -#include "ntt.h" - -void poly_ntt_jazz(poly *r, int16_t *zetas) -{ - ntt(r->coeffs); - poly_reduce(r); -} - - diff --git a/code/jasmin/mlkem_avx2/polyvec.c b/code/jasmin/mlkem_avx2/polyvec.c deleted file mode 100644 index 53835e84..00000000 --- a/code/jasmin/mlkem_avx2/polyvec.c +++ /dev/null @@ -1,237 +0,0 @@ -#include <stdint.h> -#include "polyvec.h" -#include "poly.h" - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(unsigned char *r, polyvec *a) -{ - int i,j,k; - - polyvec_csubq(a); - -#if (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 352)) - uint16_t t[8]; - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/8;j++) - { - for(k=0;k<8;k++) - t[k] = ((((uint32_t)a->vec[i].coeffs[8*j+k] << 11) + MLKEM_Q/2) / MLKEM_Q) & 0x7ff; - - r[11*j+ 0] = t[0] & 0xff; - r[11*j+ 1] = (t[0] >> 8) | ((t[1] & 0x1f) << 3); - r[11*j+ 2] = (t[1] >> 5) | ((t[2] & 0x03) << 6); - r[11*j+ 3] = (t[2] >> 2) & 0xff; - r[11*j+ 4] = (t[2] >> 10) | ((t[3] & 0x7f) << 1); - r[11*j+ 5] = (t[3] >> 7) | ((t[4] & 0x0f) << 4); - r[11*j+ 6] = (t[4] >> 4) | ((t[5] & 0x01) << 7); - r[11*j+ 7] = (t[5] >> 1) & 0xff; - r[11*j+ 8] = (t[5] >> 9) | ((t[6] & 0x3f) << 2); - r[11*j+ 9] = (t[6] >> 6) | ((t[7] & 0x07) << 5); - r[11*j+10] = (t[7] >> 3); - } - r += 352; - } -#elif (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 320)) - uint16_t t[4]; - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/4;j++) - { - for(k=0;k<4;k++) - t[k] = ((((uint32_t)a->vec[i].coeffs[4*j+k] << 10) + MLKEM_Q/2) / MLKEM_Q) & 0x3ff; - - r[5*j+ 0] = t[0] & 0xff; - r[5*j+ 1] = (t[0] >> 8) | ((t[1] & 0x3f) << 2); - r[5*j+ 2] = (t[1] >> 6) | ((t[2] & 0x0f) << 4); - r[5*j+ 3] = (t[2] >> 4) | ((t[3] & 0x03) << 6); - r[5*j+ 4] = (t[3] >> 2); - } - r += 320; - } -#else -#error "MLKEM_POLYVECCOMPRESSEDBYTES needs to be in {320*MLKEM_K, 352*MLKEM_K}" -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - unsigned char *a: pointer to input byte array (of length MLKEM_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const unsigned char *a) -{ - int i,j; -#if (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 352)) - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/8;j++) - { - r->vec[i].coeffs[8*j+0] = (((a[11*j+ 0] | (((uint32_t)a[11*j+ 1] & 0x07) << 8)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+1] = ((((a[11*j+ 1] >> 3) | (((uint32_t)a[11*j+ 2] & 0x3f) << 5)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+2] = ((((a[11*j+ 2] >> 6) | (((uint32_t)a[11*j+ 3] & 0xff) << 2) | (((uint32_t)a[11*j+ 4] & 0x01) << 10)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+3] = ((((a[11*j+ 4] >> 1) | (((uint32_t)a[11*j+ 5] & 0x0f) << 7)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+4] = ((((a[11*j+ 5] >> 4) | (((uint32_t)a[11*j+ 6] & 0x7f) << 4)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+5] = ((((a[11*j+ 6] >> 7) | (((uint32_t)a[11*j+ 7] & 0xff) << 1) | (((uint32_t)a[11*j+ 8] & 0x03) << 9)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+6] = ((((a[11*j+ 8] >> 2) | (((uint32_t)a[11*j+ 9] & 0x1f) << 6)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+7] = ((((a[11*j+ 9] >> 5) | (((uint32_t)a[11*j+10] & 0xff) << 3)) * MLKEM_Q) + 1024) >> 11; - } - a += 352; - } -#elif (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 320)) - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/4;j++) - { - r->vec[i].coeffs[4*j+0] = (((a[5*j+ 0] | (((uint32_t)a[5*j+ 1] & 0x03) << 8)) * MLKEM_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+1] = ((((a[5*j+ 1] >> 2) | (((uint32_t)a[5*j+ 2] & 0x0f) << 6)) * MLKEM_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+2] = ((((a[5*j+ 2] >> 4) | (((uint32_t)a[5*j+ 3] & 0x3f) << 4)) * MLKEM_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+3] = ((((a[5*j+ 3] >> 6) | (((uint32_t)a[5*j+ 4] & 0xff) << 2)) * MLKEM_Q) + 512) >> 10; - } - a += 320; - } -#else -#error "MLKEM_POLYVECCOMPRESSEDBYTES needs to be in {320*MLKEM_K, 352*MLKEM_K}" -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(unsigned char *r, polyvec *a) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_tobytes(r+i*MLKEM_POLYBYTES, &a->vec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - unsigned char *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials (of length MLKEM_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const unsigned char *a) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_frombytes(&r->vec[i], a+i*MLKEM_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_ntt(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_invntt -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_invntt(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_pointwise_acc -* -* Description: Pointwise multiply elements of a and b and accumulate into r -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) -{ - int i; - poly t; - - poly_basemul(r, &a->vec[0], &b->vec[0]); - for(i=1;i<MLKEM_K;i++) { - poly_basemul(&t, &a->vec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_reduce(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of each element of a vector of polynomials -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_csubq(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_csubq(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); -} diff --git a/code/jasmin/mlkem_avx2/polyvec.h b/code/jasmin/mlkem_avx2/polyvec.h deleted file mode 100644 index 78aaa397..00000000 --- a/code/jasmin/mlkem_avx2/polyvec.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[MLKEM_K]; -} polyvec; - -void polyvec_compress(unsigned char *r, polyvec *a); -void polyvec_decompress(polyvec *r, const unsigned char *a); - -void polyvec_tobytes(unsigned char *r, polyvec *a); -void polyvec_frombytes(polyvec *r, const unsigned char *a); - -void polyvec_ntt(polyvec *r); -void polyvec_invntt(polyvec *r); - -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce(polyvec *r); -void polyvec_csubq(polyvec *r); - -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - - - - -void polyvec_compress_jazz(unsigned char *r, polyvec *a); -void polyvec_decompress_jazz(polyvec *r, const unsigned char *a); - -void polyvec_tobytes_jazz(unsigned char *r, polyvec *a); -void polyvec_frombytes_jazz(polyvec *r, const unsigned char *a); - -void polyvec_ntt_jazz(polyvec *r); -void polyvec_invntt_jazz(polyvec *r); - -void polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce_jazz(polyvec *r); -void polyvec_csubq_jazz(polyvec *r); - -void polyvec_add2_jazz(polyvec *r, const polyvec *b); - - -#endif diff --git a/code/jasmin/mlkem_avx2/reduce.c b/code/jasmin/mlkem_avx2/reduce.c deleted file mode 100644 index 8d5a6997..00000000 --- a/code/jasmin/mlkem_avx2/reduce.c +++ /dev/null @@ -1,62 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "reduce.h" - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, -* where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) -{ - int32_t t; - int16_t u; - -// printf("a: %d\n", a); - u = a * QINV; - t = (int32_t)u * MLKEM_Q; - t = a - t; - t >>= 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* 16-bit integer congruent to a mod q in {0,...,q} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {0,...,q} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int32_t t; - const int32_t v = (1U << 26)/MLKEM_Q + 1; - - t = v*a; - t >>= 26; - t *= MLKEM_Q; - return a - t; -} - -/************************************************* -* Name: csubq -* -* Description: Conditionallly subtract q -* -* Arguments: - int16_t x: input integer -* -* Returns: a - q if a >= q, else a -**************************************************/ -int16_t csubq(int16_t a) { - a -= MLKEM_Q; - a += (a >> 15) & MLKEM_Q; - return a; -} diff --git a/code/jasmin/mlkem_avx2/reduce.h b/code/jasmin/mlkem_avx2/reduce.h deleted file mode 100644 index 59ee6ef4..00000000 --- a/code/jasmin/mlkem_avx2/reduce.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include <stdint.h> - -#define MONT 2285 // 2^16 % Q -#define QINV 62209 // q^(-1) mod 2^16 - -int16_t montgomery_reduce(int32_t a); - -int16_t barrett_reduce(int16_t a); - -int16_t csubq(int16_t x); - -#endif diff --git a/code/jasmin/mlkem_avx2/shuffle.S b/code/jasmin/mlkem_avx2/shuffle.S deleted file mode 100644 index 46b676a1..00000000 --- a/code/jasmin/mlkem_avx2/shuffle.S +++ /dev/null @@ -1,261 +0,0 @@ -#include "consts.h" -.include "fq.inc" -.include "shuffle.inc" - -nttpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -#store -vmovdqa %ymm7,(%rdi) -vmovdqa %ymm9,32(%rdi) -vmovdqa %ymm6,64(%rdi) -vmovdqa %ymm3,96(%rdi) -vmovdqa %ymm10,128(%rdi) -vmovdqa %ymm4,160(%rdi) -vmovdqa %ymm5,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.text -nttunpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttunpack_avx) -cdecl(nttunpack_avx): -call nttunpack128_avx -add $256,%rdi -call nttunpack128_avx -ret - -.global cdecl(nttpack_avx) -cdecl(nttpack_avx): -call nttpack128_avx -add $256,%rdi -call nttpack128_avx -ret - - -ntttobytes128_avx: -#load -vmovdqa (%rsi),%ymm5 -vmovdqa 32(%rsi),%ymm6 -vmovdqa 64(%rsi),%ymm7 -vmovdqa 96(%rsi),%ymm8 -vmovdqa 128(%rsi),%ymm9 -vmovdqa 160(%rsi),%ymm10 -vmovdqa 192(%rsi),%ymm11 -vmovdqa 224(%rsi),%ymm12 - -#csubq -csubq 5,13 -csubq 6,14 -csubq 7,15 -csubq 8,1 -csubq 9,13 -csubq 10,14 -csubq 11,15 -csubq 12,1 - -#bitpack -vpsllw $12,%ymm6,%ymm4 -vpor %ymm4,%ymm5,%ymm4 - -vpsrlw $4,%ymm6,%ymm5 -vpsllw $8,%ymm7,%ymm6 -vpor %ymm5,%ymm6,%ymm5 - -vpsrlw $8,%ymm7,%ymm6 -vpsllw $4,%ymm8,%ymm7 -vpor %ymm6,%ymm7,%ymm6 - -vpsllw $12,%ymm10,%ymm7 -vpor %ymm7,%ymm9,%ymm7 - -vpsrlw $4,%ymm10,%ymm8 -vpsllw $8,%ymm11,%ymm9 -vpor %ymm8,%ymm9,%ymm8 - -vpsrlw $8,%ymm11,%ymm9 -vpsllw $4,%ymm12,%ymm10 -vpor %ymm9,%ymm10,%ymm9 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 - -shuffle2 3,4,8,4 -shuffle2 6,5,3,5 -shuffle2 7,9,6,9 - -shuffle4 8,3,7,3 -shuffle4 6,4,8,4 -shuffle4 5,9,6,9 - -shuffle8 7,8,5,8 -shuffle8 6,3,7,3 -shuffle8 4,9,6,9 - -#store -vmovdqu %ymm5,(%rdi) -vmovdqu %ymm7,32(%rdi) -vmovdqu %ymm6,64(%rdi) -vmovdqu %ymm8,96(%rdi) -vmovdqu %ymm3,128(%rdi) -vmovdqu %ymm9,160(%rdi) - -ret - -.global cdecl(ntttobytes_avx) -cdecl(ntttobytes_avx): -#consts -vmovdqa _16XQ*2(%rdx),%ymm0 -call ntttobytes128_avx -add $256,%rsi -add $192,%rdi -call ntttobytes128_avx -ret - -nttfrombytes128_avx: -#load -vmovdqu (%rsi),%ymm4 -vmovdqu 32(%rsi),%ymm5 -vmovdqu 64(%rsi),%ymm6 -vmovdqu 96(%rsi),%ymm7 -vmovdqu 128(%rsi),%ymm8 -vmovdqu 160(%rsi),%ymm9 - -shuffle8 4,7,3,7 -shuffle8 5,8,4,8 -shuffle8 6,9,5,9 - -shuffle4 3,8,6,8 -shuffle4 7,5,3,5 -shuffle4 4,9,7,9 - -shuffle2 6,5,4,5 -shuffle2 8,7,6,7 -shuffle2 3,9,8,9 - -shuffle1 4,7,10,7 -shuffle1 5,8,4,8 -shuffle1 6,9,5,9 - -#bitunpack -vpsrlw $12,%ymm10,%ymm11 -vpsllw $4,%ymm7,%ymm12 -vpor %ymm11,%ymm12,%ymm11 -vpand %ymm0,%ymm10,%ymm10 -vpand %ymm0,%ymm11,%ymm11 - -vpsrlw $8,%ymm7,%ymm12 -vpsllw $8,%ymm4,%ymm13 -vpor %ymm12,%ymm13,%ymm12 -vpand %ymm0,%ymm12,%ymm12 - -vpsrlw $4,%ymm4,%ymm13 -vpand %ymm0,%ymm13,%ymm13 - -vpsrlw $12,%ymm8,%ymm14 -vpsllw $4,%ymm5,%ymm15 -vpor %ymm14,%ymm15,%ymm14 -vpand %ymm0,%ymm8,%ymm8 -vpand %ymm0,%ymm14,%ymm14 - -vpsrlw $8,%ymm5,%ymm15 -vpsllw $8,%ymm9,%ymm1 -vpor %ymm15,%ymm1,%ymm15 -vpand %ymm0,%ymm15,%ymm15 - -vpsrlw $4,%ymm9,%ymm1 -vpand %ymm0,%ymm1,%ymm1 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm11,32(%rdi) -vmovdqa %ymm12,64(%rdi) -vmovdqa %ymm13,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm14,160(%rdi) -vmovdqa %ymm15,192(%rdi) -vmovdqa %ymm1,224(%rdi) - -ret - -.global cdecl(nttfrombytes_avx) -cdecl(nttfrombytes_avx): -#consts -vmovdqa _16XMASK*2(%rdx),%ymm0 -call nttfrombytes128_avx -add $256,%rdi -add $192,%rsi -call nttfrombytes128_avx -ret diff --git a/code/jasmin/mlkem_avx2/speed.h b/code/jasmin/mlkem_avx2/speed.h deleted file mode 100644 index 070b30ac..00000000 --- a/code/jasmin/mlkem_avx2/speed.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef SPEED_H -#define SPEED_H - -#include<stdint.h> -#include "params.h" - -typedef struct{ - int16_t __attribute__((aligned(32))) coeffs[MLKEM_N]; -} poly; - -typedef struct{ - poly vec[MLKEM_K]; -} polyvec; - -void gen_matrix_jazz(polyvec *a, unsigned char *seed); - -/*Poly functions*/ -void poly_compress_jazz(unsigned char *r, poly *a); -void poly_decompress_jazz(poly *r, const unsigned char *a); - -void poly_frommsg_jazz(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); -void poly_tomsg_jazz(unsigned char msg[MLKEM_SYMBYTES], poly *r); - -void poly_getnoise_jazz(poly *r,const unsigned char *seed, unsigned char nonce); -void poly_getnoise_4x_jazz(poly *r0, poly *r1, poly *r2, poly *r3,const unsigned char *seed, unsigned char nonce); - -void poly_ntt_jazz(poly *r); -void poly_invntt_jazz(poly *r); - -/*Polyvec functions*/ -void polyvec_compress_jazz(unsigned char *r, polyvec *a); -void polyvec_decompress_jazz(polyvec *r, const unsigned char *a); - -void polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); - -/* Indcpa functions*/ -void indcpa_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - -/* KEM functions */ -void crypto_kem_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void crypto_kem_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); -void crypto_kem_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); -#endif diff --git a/code/jasmin/mlkem_avx2/symmetric-fips202.c b/code/jasmin/mlkem_avx2/symmetric-fips202.c deleted file mode 100644 index b8f64a41..00000000 --- a/code/jasmin/mlkem_avx2/symmetric-fips202.c +++ /dev/null @@ -1,77 +0,0 @@ -#include <stdlib.h> -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: mlkem_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to MLKEM_SYMBYTES input to be absorbed into s -* - unsigned char i additional byte of input -* - unsigned char j additional byte of input -**************************************************/ -void mlkem_shake128_absorb(keccak_state *s, const unsigned char *input, unsigned char x, unsigned char y) -{ - unsigned char extseed[MLKEM_SYMBYTES+2]; - int i; - - for(i=0;i<MLKEM_SYMBYTES;i++) - extseed[i] = input[i]; - extseed[i++] = x; - extseed[i] = y; - shake128_absorb(s->s, extseed, MLKEM_SYMBYTES+2); -} - -/************************************************* -* Name: mlkem_shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - keccak_state *s: pointer to in/output Keccak state -**************************************************/ -void mlkem_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, keccak_state *s) -{ - shake128_squeezeblocks(output, nblocks, s->s); -} - -/************************************************* -* Name: shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: number of requested output bytes -* - const unsigned char * key: pointer to the key (of length MLKEM_SYMBYTES) -* - const unsigned char nonce: single-byte nonce (public PRF input) -**************************************************/ -void shake256_prf(unsigned char *output, unsigned long long outlen, const unsigned char *key, const unsigned char nonce) -{ - unsigned char extkey[MLKEM_SYMBYTES+1]; - size_t i; - - for(i=0;i<MLKEM_SYMBYTES;i++) - extkey[i] = key[i]; - extkey[i] = nonce; - - shake256(output, outlen, extkey, MLKEM_SYMBYTES+1); -} - -void shake256_rkprf(unsigned char *out, const unsigned char *key, const unsigned char *input) -{ - unsigned char extkey[MLKEM_SYMBYTES+MLKEM_CIPHERTEXTBYTES]; - size_t i; - - for(i=0;i<MLKEM_SYMBYTES;i++) - extkey[i] = key[i]; - for(i=0;i<MLKEM_CIPHERTEXTBYTES;i++) - extkey[i+MLKEM_SYMBYTES] = input[i]; - - shake256(out, MLKEM_SYMBYTES, extkey, MLKEM_SYMBYTES+MLKEM_CIPHERTEXTBYTES); -} \ No newline at end of file diff --git a/code/jasmin/mlkem_avx2/symmetric.h b/code/jasmin/mlkem_avx2/symmetric.h deleted file mode 100644 index b609d9bb..00000000 --- a/code/jasmin/mlkem_avx2/symmetric.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include "params.h" - -#ifdef MLKEM_90S - -#include "aes256ctr.h" -#include "sha2.h" - -#if (MLKEM_SSBYTES != 32) -#error "90s variant of Kyber can only generate keys of length 256 bits" -#endif - -#define hash_h(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha512(OUT, IN, INBYTES) -#define xof_absorb(STATE, IN, X, Y) aes256xof_absorb(STATE, IN, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) aes256xof_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) aes256_prf(OUT, OUTBYTES, KEY, NONCE) -#define kdf(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) - -#define XOF_BLOCKBYTES 64 - -typedef aes256xof_ctx xof_state; - -#else - -#include "fips202.h" - -typedef struct { - uint64_t s[25]; -} keccak_state; - -void mlkem_shake128_absorb(keccak_state *s, const unsigned char *input, unsigned char x, unsigned char y); -void mlkem_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, keccak_state *s); -void shake256_prf(unsigned char *output, unsigned long long outlen, const unsigned char *key, const unsigned char nonce); -void shake256_rkprf(unsigned char *out, const unsigned char *key, const unsigned char *input); - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_absorb(STATE, IN, X, Y) mlkem_shake128_absorb(STATE, IN, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) mlkem_shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) shake256_rkprf(OUT, KEY, INPUT) - -#define XOF_BLOCKBYTES 168 - -typedef keccak_state xof_state; - -#endif /* MLKEM_90S */ - -#endif /* SYMMETRIC_H */ diff --git a/code/jasmin/mlkem_ref/Makefile b/code/jasmin/mlkem_ref/Makefile index 029ea8fb..618778c6 100644 --- a/code/jasmin/mlkem_ref/Makefile +++ b/code/jasmin/mlkem_ref/Makefile @@ -2,6 +2,7 @@ -include ../../Makefile.conf +CKP := ../../kyber/ref CC ?= /usr/bin/gcc CFLAGS := -Wall -Wextra -g -O3 -fomit-frame-pointer JFLAGS := ${JADDFLAGS} @@ -42,7 +43,7 @@ test: test/test_poly_compress \ speed: test/speed_indcpa -HEADERS = params.h poly.h fips202.h kem.h +HEADERS = $(CKP)/params.h $(CKP)/poly.h $(CKP)/fips202.h $(CKP)/kem.h JHEADERS = params.jinc \ @@ -57,7 +58,7 @@ POLYHEADERS = poly.jinc \ POLYVECHEADERS = polyvec.jinc \ gen_matrix.jinc \ -SOURCES = poly.c polyvec.c cbd.c fips202.c ntt.c reduce.c symmetric-fips202.c indcpa.c kem.c \ +SOURCES = $(CKP)/poly.c $(CKP)/polyvec.c $(CKP)/cbd.c $(CKP)/fips202.c $(CKP)/ntt.c $(CKP)/reduce.c $(CKP)/symmetric-shake.c $(CKP)/indcpa.c $(CKP)/kem.c \ test/test_indcpa: test/test_indcpa.c $(HEADERS) $(SOURCES) jindcpa.o $(CC) $(CFLAGS) -o $@ $(SOURCES) jindcpa.o $< diff --git a/code/jasmin/mlkem_ref/cbd.c b/code/jasmin/mlkem_ref/cbd.c deleted file mode 100644 index 8e6580ad..00000000 --- a/code/jasmin/mlkem_ref/cbd.c +++ /dev/null @@ -1,58 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: load32_littleendian -* -* Description: load bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const unsigned char *x) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: cbd -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter MLKEM_ETA -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *buf: pointer to input byte array -**************************************************/ -void cbd(poly *r, const unsigned char *buf) -{ -#if MLKEM_ETA == 2 - uint32_t d,t; - int16_t a,b; - int i,j; - - for(i=0;i<MLKEM_N/8;i++) - { - t = load32_littleendian(buf+4*i); - d = t & 0x55555555; - d += (t>>1) & 0x55555555; - - for(j=0;j<8;j++) - { - a = (d >> 4*j) & 0x3; - b = (d >> (4*j+2)) & 0x3; - r->coeffs[8*i+j] = a - b; - } - } -#else -#error "poly_getnoise in poly.c only supports eta=2" -#endif -} diff --git a/code/jasmin/mlkem_ref/cbd.h b/code/jasmin/mlkem_ref/cbd.h deleted file mode 100644 index e3dbe040..00000000 --- a/code/jasmin/mlkem_ref/cbd.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include "poly.h" - -void cbd(poly *r, const unsigned char *buf); - -#endif diff --git a/code/jasmin/mlkem_ref/extraction/jkem.ec b/code/jasmin/mlkem_ref/extraction/jkem.ec index 7a12a75c..36f48abc 100644 --- a/code/jasmin/mlkem_ref/extraction/jkem.ec +++ b/code/jasmin/mlkem_ref/extraction/jkem.ec @@ -142,7 +142,6 @@ module M(SC:Syscall_t) = { t <- (t * (W32.of_int 20159)); t <- (t `|>>` (W8.of_int 26)); t <- (t * (W32.of_int 3329)); - r <- (truncateu16 t); r <- a; r <- (r - (truncateu16 t)); return r; diff --git a/code/jasmin/mlkem_ref/fips202.c b/code/jasmin/mlkem_ref/fips202.c deleted file mode 100644 index d300328b..00000000 --- a/code/jasmin/mlkem_ref/fips202.c +++ /dev/null @@ -1,549 +0,0 @@ -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ - -#include <stdint.h> -#include <assert.h> -#include "fips202.h" - -#define NROUNDS 24 -#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) - -/************************************************* -* Name: load64 -* -* Description: Load 8 bytes into uint64_t in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns the loaded 64-bit unsigned integer -**************************************************/ -static uint64_t load64(const unsigned char *x) -{ - unsigned long long r = 0, i; - - for (i = 0; i < 8; ++i) { - r |= (unsigned long long)x[i] << 8 * i; - } - return r; -} - -/************************************************* -* Name: store64 -* -* Description: Store a 64-bit integer to a byte array in little-endian order -* -* Arguments: - uint8_t *x: pointer to the output byte array -* - uint64_t u: input 64-bit unsigned integer -**************************************************/ -static void store64(uint8_t *x, uint64_t u) -{ - unsigned int i; - - for(i=0; i<8; ++i) { - x[i] = u; - u >>= 8; - } -} - -/* Keccak round constants */ -static const uint64_t KeccakF_RoundConstants[NROUNDS] = -{ - (uint64_t)0x0000000000000001ULL, - (uint64_t)0x0000000000008082ULL, - (uint64_t)0x800000000000808aULL, - (uint64_t)0x8000000080008000ULL, - (uint64_t)0x000000000000808bULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008009ULL, - (uint64_t)0x000000000000008aULL, - (uint64_t)0x0000000000000088ULL, - (uint64_t)0x0000000080008009ULL, - (uint64_t)0x000000008000000aULL, - (uint64_t)0x000000008000808bULL, - (uint64_t)0x800000000000008bULL, - (uint64_t)0x8000000000008089ULL, - (uint64_t)0x8000000000008003ULL, - (uint64_t)0x8000000000008002ULL, - (uint64_t)0x8000000000000080ULL, - (uint64_t)0x000000000000800aULL, - (uint64_t)0x800000008000000aULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008080ULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008008ULL -}; - -/************************************************* -* Name: KeccakF1600_StatePermute -* -* Description: The Keccak F1600 Permutation -* -* Arguments: - uint64_t * state: pointer to in/output Keccak state -**************************************************/ -static void KeccakF1600_StatePermute(uint64_t * state) -{ - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - uint64_t BCa, BCe, BCi, BCo, BCu; - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - //copyFromState(A, state) - Aba = state[ 0]; - Abe = state[ 1]; - Abi = state[ 2]; - Abo = state[ 3]; - Abu = state[ 4]; - Aga = state[ 5]; - Age = state[ 6]; - Agi = state[ 7]; - Ago = state[ 8]; - Agu = state[ 9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for( round = 0; round < NROUNDS; round += 2 ) - { - // prepareTheta - BCa = Aba^Aga^Aka^Ama^Asa; - BCe = Abe^Age^Ake^Ame^Ase; - BCi = Abi^Agi^Aki^Ami^Asi; - BCo = Abo^Ago^Ako^Amo^Aso; - BCu = Abu^Agu^Aku^Amu^Asu; - - //thetaRhoPiChiIotaPrepareTheta(round , A, E) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^((~BCe)& BCi ); - Eba ^= (uint64_t)KeccakF_RoundConstants[round]; - Ebe = BCe ^((~BCi)& BCo ); - Ebi = BCi ^((~BCo)& BCu ); - Ebo = BCo ^((~BCu)& BCa ); - Ebu = BCu ^((~BCa)& BCe ); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^((~BCe)& BCi ); - Ege = BCe ^((~BCi)& BCo ); - Egi = BCi ^((~BCo)& BCu ); - Ego = BCo ^((~BCu)& BCa ); - Egu = BCu ^((~BCa)& BCe ); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^((~BCe)& BCi ); - Eke = BCe ^((~BCi)& BCo ); - Eki = BCi ^((~BCo)& BCu ); - Eko = BCo ^((~BCu)& BCa ); - Eku = BCu ^((~BCa)& BCe ); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^((~BCe)& BCi ); - Eme = BCe ^((~BCi)& BCo ); - Emi = BCi ^((~BCo)& BCu ); - Emo = BCo ^((~BCu)& BCa ); - Emu = BCu ^((~BCa)& BCe ); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^((~BCe)& BCi ); - Ese = BCe ^((~BCi)& BCo ); - Esi = BCi ^((~BCo)& BCu ); - Eso = BCo ^((~BCu)& BCa ); - Esu = BCu ^((~BCa)& BCe ); - - // prepareTheta - BCa = Eba^Ega^Eka^Ema^Esa; - BCe = Ebe^Ege^Eke^Eme^Ese; - BCi = Ebi^Egi^Eki^Emi^Esi; - BCo = Ebo^Ego^Eko^Emo^Eso; - BCu = Ebu^Egu^Eku^Emu^Esu; - - //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^((~BCe)& BCi ); - Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; - Abe = BCe ^((~BCi)& BCo ); - Abi = BCi ^((~BCo)& BCu ); - Abo = BCo ^((~BCu)& BCa ); - Abu = BCu ^((~BCa)& BCe ); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^((~BCe)& BCi ); - Age = BCe ^((~BCi)& BCo ); - Agi = BCi ^((~BCo)& BCu ); - Ago = BCo ^((~BCu)& BCa ); - Agu = BCu ^((~BCa)& BCe ); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^((~BCe)& BCi ); - Ake = BCe ^((~BCi)& BCo ); - Aki = BCi ^((~BCo)& BCu ); - Ako = BCo ^((~BCu)& BCa ); - Aku = BCu ^((~BCa)& BCe ); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^((~BCe)& BCi ); - Ame = BCe ^((~BCi)& BCo ); - Ami = BCi ^((~BCo)& BCu ); - Amo = BCo ^((~BCu)& BCa ); - Amu = BCu ^((~BCa)& BCe ); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^((~BCe)& BCi ); - Ase = BCe ^((~BCi)& BCo ); - Asi = BCi ^((~BCo)& BCu ); - Aso = BCo ^((~BCu)& BCa ); - Asu = BCu ^((~BCa)& BCe ); - } - - //copyToState(state, A) - state[ 0] = Aba; - state[ 1] = Abe; - state[ 2] = Abi; - state[ 3] = Abo; - state[ 4] = Abu; - state[ 5] = Aga; - state[ 6] = Age; - state[ 7] = Agi; - state[ 8] = Ago; - state[ 9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; - - #undef round -} - -#include <string.h> -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - - -/************************************************* -* Name: keccak_absorb -* -* Description: Absorb step of Keccak; -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -* - const unsigned char *m: pointer to input to be absorbed into s -* - unsigned long long mlen: length of input in bytes -* - unsigned char p: domain-separation byte for different Keccak-derived functions -**************************************************/ -static void keccak_absorb(uint64_t *s, - unsigned int r, - const unsigned char *m, unsigned long long int mlen, - unsigned char p) -{ - unsigned long long i; - unsigned char t[200]; - - // Zero state - for (i = 0; i < 25; ++i) - s[i] = 0; - - while (mlen >= r) - { - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(m + 8 * i); - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) - t[i] = 0; - for (i = 0; i < mlen; ++i) - t[i] = m[i]; - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(t + 8 * i); -} - - -/************************************************* -* Name: keccak_squeezeblocks -* -* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *h: pointer to output blocks -* - unsigned long long int nblocks: number of blocks to be squeezed (written to h) -* - uint64_t *s: pointer to in/output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -**************************************************/ -static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, - uint64_t *s, - unsigned int r) -{ - unsigned int i; - while(nblocks > 0) - { - KeccakF1600_StatePermute(s); - for(i=0;i<(r>>3);i++) - { - store64(h+8*i, s[i]); - } - h += r; - nblocks--; - } -} - - -/************************************************* -* Name: shake128_absorb -* -* Description: Absorb step of the SHAKE128 XOF. -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to input to be absorbed into s -* - unsigned long long inputByteLen: length of input in bytes -**************************************************/ -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) -{ - keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F); -} - -/************************************************* -* Name: shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - uint64_t *s: pointer to in/output Keccak state -**************************************************/ -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) -{ - keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); -} - -/************************************************* -* Name: shake256 -* -* Description: SHAKE256 XOF with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: requested output length in bytes - - const unsigned char *input: pointer to input - - unsigned long long inlen: length of input in bytes -**************************************************/ -void shake256(unsigned char *output, unsigned long long outlen, - const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHAKE256_RATE]; - unsigned long long nblocks = outlen/SHAKE256_RATE; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F); - - /* Squeeze output */ - keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); - - output+=nblocks*SHAKE256_RATE; - outlen-=nblocks*SHAKE256_RATE; - - if(outlen) - { - keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); - for(i=0;i<outlen;i++) - output[i] = t[i]; - } -} - -/************************************************* -* Name: sha3_256 -* -* Description: SHA3-256 with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output (32 bytes) -* - const unsigned char *input: pointer to input -* - unsigned long long inlen: length of input in bytes -**************************************************/ -void sha3_256(unsigned char *output, const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHA3_256_RATE]; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHA3_256_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, SHA3_256_RATE); - - for(i=0;i<32;i++) - output[i] = t[i]; -} - -/************************************************* -* Name: sha3_512 -* -* Description: SHA3-512 with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output (64 bytes) -* - const unsigned char *input: pointer to input -* - unsigned long long inlen: length of input in bytes -**************************************************/ -void sha3_512(unsigned char *output, const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHA3_512_RATE]; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHA3_512_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, SHA3_512_RATE); - - for(i=0;i<64;i++) - output[i] = t[i]; -} - diff --git a/code/jasmin/mlkem_ref/fips202.h b/code/jasmin/mlkem_ref/fips202.h deleted file mode 100644 index 25f6a15c..00000000 --- a/code/jasmin/mlkem_ref/fips202.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef FIPS202_H -#define FIPS202_H - -#include <stdint.h> - -#define SHAKE128_RATE 168 -#define SHAKE256_RATE 136 -#define SHA3_256_RATE 136 -#define SHA3_512_RATE 72 - -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); - -void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen); - -void sha3_256(unsigned char *output, const unsigned char *input, unsigned long long inlen); -void sha3_512(unsigned char *output, const unsigned char *input, unsigned long long inlen); - - - -void shake256_128_33_jazz(unsigned char *output, const unsigned char *input); -void sha3512_32_jazz(unsigned char *output, const unsigned char *input); - -void shake128_absorb34_jazz(uint64_t *s, const unsigned char *input); -void shake128_squeezeblock_jazz(unsigned char *output, uint64_t *s); - - -#endif diff --git a/code/jasmin/mlkem_ref/indcpa.c b/code/jasmin/mlkem_ref/indcpa.c deleted file mode 100644 index 4a35195f..00000000 --- a/code/jasmin/mlkem_ref/indcpa.c +++ /dev/null @@ -1,321 +0,0 @@ -#include <stdint.h> -#include "indcpa.h" -#include "poly.h" -#include "polyvec.h" -#include "ntt.h" -#include "symmetric.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: unsigned char *r: pointer to the output serialized public key -* const poly *pk: pointer to the input public-key polynomial -* const unsigned char *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(unsigned char *r, polyvec *pk, const unsigned char *seed) -{ - int i; - polyvec_tobytes(r, pk); - for(i=0;i<MLKEM_SYMBYTES;i++) - r[i+MLKEM_POLYVECBYTES] = seed[i]; -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key vector of polynomials -* - unsigned char *seed: pointer to output seed to generate matrix A -* - const unsigned char *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, unsigned char *seed, const unsigned char *packedpk) -{ - int i; - polyvec_frombytes(pk, packedpk); - for(i=0;i<MLKEM_SYMBYTES;i++) - seed[i] = packedpk[i+MLKEM_POLYVECBYTES]; -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - unsigned char *r: pointer to output serialized secret key -* - const polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(unsigned char *r, polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; -* inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const unsigned char *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const unsigned char *packedsk) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: unsigned char *r: pointer to the output serialized ciphertext -* const poly *pk: pointer to the input vector of polynomials b -* const unsigned char *seed: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(unsigned char *r, polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+MLKEM_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const unsigned char *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const unsigned char *c) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+MLKEM_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const unsigned char *buf: pointer to input buffer (assumed to be uniform random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, unsigned int len, const unsigned char *buf, unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val1, val2; - - ctr = pos = 0; - while(ctr < len && pos + 3 <= buflen) - { - val1 = buf[pos] | ((uint16_t)(buf[pos+1] & 0x0F) << 8); - pos ++; - val2 = ((buf[pos] >> 4) | ((uint16_t)buf[pos+1] << 4)); - pos += 2; - - if(val1 < MLKEM_Q) - { - r[ctr++] = (int16_t)val1; - } - - if(val2 < MLKEM_Q && ctr < len) { - r[ctr++] = (int16_t)val2; - } - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const unsigned char *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -static void gen_matrix(polyvec *a, const unsigned char *seed, int transposed) // Not static for benchmarking -{ - unsigned int ctr, i, j; - const unsigned int maxnblocks=(530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES; /* 530 is expected number of required bytes */ - unsigned char buf[XOF_BLOCKBYTES*maxnblocks+1]; - xof_state state; - - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_K;j++) - { - if(transposed) { - xof_absorb(&state, seed, i, j); - } - else { - xof_absorb(&state, seed, j, i); - } - - xof_squeezeblocks(buf, maxnblocks, &state); - ctr = rej_uniform(a[i].vec[j].coeffs, MLKEM_N, buf, maxnblocks*XOF_BLOCKBYTES); - - while(ctr < MLKEM_N) - { - xof_squeezeblocks(buf, 1, &state); - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, MLKEM_N - ctr, buf, XOF_BLOCKBYTES); - } - } - } -} - -/************************************************* -* Name: indcpa_keypair -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - unsigned char *pk: pointer to output public key (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) -* - unsigned char *sk: pointer to output private key (of length MLKEM_INDCPA_SECRETKEYBYTES bytes) -**************************************************/ -void indcpa_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness) -{ - polyvec a[MLKEM_K], e, pkpv, skpv; - unsigned char buf[2*MLKEM_SYMBYTES]; - unsigned char *publicseed = buf; - unsigned char *noiseseed = buf+MLKEM_SYMBYTES; - int i; - unsigned char nonce=0; - - //randombytes(buf, MLKEM_SYMBYTES); - for(i=0;i<MLKEM_SYMBYTES;i++) - buf[i] = randomness[i]; - - hash_g(buf, buf, MLKEM_SYMBYTES); - - gen_a(a, publicseed); - - for(i=0;i<MLKEM_K;i++) - poly_getnoise(skpv.vec+i, noiseseed, nonce++); - for(i=0;i<MLKEM_K;i++) - poly_getnoise(e.vec+i, noiseseed, nonce++); - - polyvec_ntt(&skpv); - polyvec_ntt(&e); - - // matrix-vector multiplication - for(i=0;i<MLKEM_K;i++) { - polyvec_pointwise_acc(&pkpv.vec[i], &a[i], &skpv); - poly_frommont(&pkpv.vec[i]); - } - - polyvec_add(&pkpv, &pkpv, &e); - polyvec_reduce(&pkpv); - - pack_sk(sk, &skpv); - pack_pk(pk, &pkpv, publicseed); -} - -/************************************************* -* Name: indcpa_enc -* -* Description: Encryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - unsigned char *c: pointer to output ciphertext (of length MLKEM_INDCPA_BYTES bytes) -* - const unsigned char *m: pointer to input message (of length MLKEM_INDCPA_MSGBYTES bytes) -* - const unsigned char *pk: pointer to input public key (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) -* - const unsigned char *coin: pointer to input random coins used as seed (of length MLKEM_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void indcpa_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins) -{ - polyvec sp, pkpv, ep, at[MLKEM_K], bp; - poly v, k, epp; - unsigned char seed[MLKEM_SYMBYTES]; - int i; - unsigned char nonce=0; - - unpack_pk(&pkpv, seed, pk); - poly_frommsg(&k, m); - gen_at(at, seed); - - for(i=0;i<MLKEM_K;i++) - poly_getnoise(sp.vec+i, coins, nonce++); - for(i=0;i<MLKEM_K;i++) - poly_getnoise(ep.vec+i, coins, nonce++); - poly_getnoise(&epp, coins, nonce++); - - polyvec_ntt(&sp); - - // matrix-vector multiplication - for(i=0;i<MLKEM_K;i++) - polyvec_pointwise_acc(&bp.vec[i], &at[i], &sp); - - polyvec_pointwise_acc(&v, &pkpv, &sp); - - polyvec_invntt(&bp); - poly_invntt(&v); - - polyvec_add(&bp, &bp, &ep); - poly_add(&v, &v, &epp); - poly_add(&v, &v, &k); - polyvec_reduce(&bp); - poly_reduce(&v); - - pack_ciphertext(c, &bp, &v); -} - -/************************************************* -* Name: indcpa_dec -* -* Description: Decryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - unsigned char *m: pointer to output decrypted message (of length MLKEM_INDCPA_MSGBYTES) -* - const unsigned char *c: pointer to input ciphertext (of length MLKEM_INDCPA_BYTES) -* - const unsigned char *sk: pointer to input secret key (of length MLKEM_INDCPA_SECRETKEYBYTES) -**************************************************/ -void indcpa_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk) -{ - polyvec bp, skpv; - poly v, mp; - - unpack_ciphertext(&bp, &v, c); - unpack_sk(&skpv, sk); - - - polyvec_ntt(&bp); - polyvec_pointwise_acc(&mp, &skpv, &bp); - poly_invntt(&mp); - - poly_sub(&mp, &v, &mp); - poly_reduce(&mp); - -// poly_tobytes(m, &mp); - poly_tomsg(m, &mp); -} diff --git a/code/jasmin/mlkem_ref/indcpa.h b/code/jasmin/mlkem_ref/indcpa.h deleted file mode 100644 index f07fc16a..00000000 --- a/code/jasmin/mlkem_ref/indcpa.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef INDCPA_H -#define INDCPA_H - -#include <stdint.h> - -void indcpa_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - - - -void indcpa_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/mlkem_ref/kem.c b/code/jasmin/mlkem_ref/kem.c deleted file mode 100644 index 9409e584..00000000 --- a/code/jasmin/mlkem_ref/kem.c +++ /dev/null @@ -1,145 +0,0 @@ -#include <stdint.h> -#include <stddef.h> -#include <string.h> -#include "kem.h" -#include "indcpa.h" -#include "symmetric.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -uint64_t verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint8_t r; - - r = 0; - for(i=0; i < len; i ++) - r |= a[i] ^ b[i]; - - return (-(uint64_t)r) >> 63; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - - b = -b; - for(i=0; i < len; i++) - r[i] ^= b & (r[i] ^ x[i]); -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key for the CCA-secure -* Kyber key encapsulation mechanism -* -* Arguments: - unsigned char *pk: pointer to output public key -* - unsigned char *sk: pointer to output private key -**************************************************/ -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness) -{ - indcpa_keypair(pk, sk, randomness); - - memcpy(sk+MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCPA_PUBLICKEYBYTES); - - hash_h(sk+MLKEM_SECRETKEYBYTES-2*MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES); - - memcpy(sk+MLKEM_SECRETKEYBYTES-MLKEM_SYMBYTES, randomness + MLKEM_SYMBYTES, MLKEM_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - unsigned char *c: pointer to output ciphertext (of length MLKEM_INDCPA_BYTES bytes) -* - const unsigned char *m: pointer to input message (of length MLKEM_INDCPA_MSGBYTES bytes) -* - const unsigned char *pk: pointer to input public key (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) -* - const unsigned char *coin: pointer to input random coins used as seed (of length MLKEM_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void crypto_kem_enc(unsigned char *ct, - unsigned char *ss, - const unsigned char *pk, - const unsigned char *coins) -{ - uint8_t buf[2*MLKEM_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*MLKEM_SYMBYTES]; - - memcpy(buf, coins, MLKEM_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES); - hash_g(kr, buf, 2*MLKEM_SYMBYTES); - - /* coins are in kr+MLKEM_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+MLKEM_SYMBYTES); - - memcpy(ss,kr,MLKEM_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - unsigned char *m: pointer to output decrypted message (of length MLKEM_INDCPA_MSGBYTES) -* - const unsigned char *c: pointer to input ciphertext (of length MLKEM_INDCPA_BYTES) -* - const unsigned char *sk: pointer to input secret key (of length MLKEM_INDCPA_SECRETKEYBYTES) -**************************************************/ -void crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*MLKEM_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*MLKEM_SYMBYTES]; - uint8_t cmp[MLKEM_CIPHERTEXTBYTES+MLKEM_SYMBYTES]; - const uint8_t *pk = sk+MLKEM_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+MLKEM_SYMBYTES, sk+MLKEM_SECRETKEYBYTES-2*MLKEM_SYMBYTES, MLKEM_SYMBYTES); - hash_g(kr, buf, 2*MLKEM_SYMBYTES); - - /* coins are in kr+MLKEM_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+MLKEM_SYMBYTES); - - fail = verify(ct, cmp, MLKEM_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+MLKEM_SECRETKEYBYTES-MLKEM_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,MLKEM_SYMBYTES,!fail); -} diff --git a/code/jasmin/mlkem_ref/kem.h b/code/jasmin/mlkem_ref/kem.h deleted file mode 100644 index 87d42062..00000000 --- a/code/jasmin/mlkem_ref/kem.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef CRYPTO_KEM_H -#define CRYPTO_KEM_H - -#include <stdint.h> - -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void crypto_kem_enc(unsigned char *c, - unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void crypto_kem_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - -void jade_kem_mlkem_mlkem768_amd64_ref_keypair_derand(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void jade_kem_mlkem_mlkem768_amd64_ref_enc_derand(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - - -void jade_kem_mlkem_mlkem768_amd64_ref_keypair(unsigned char *pk, - unsigned char *sk); - -void jade_kem_mlkem_mlkem768_amd64_ref_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk); - -void jade_kem_mlkem_mlkem768_amd64_ref_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/mlkem_ref/ntt.c b/code/jasmin/mlkem_ref/ntt.c deleted file mode 100644 index 7be9370e..00000000 --- a/code/jasmin/mlkem_ref/ntt.c +++ /dev/null @@ -1,152 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "ntt.h" -#include "reduce.h" - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define MLKEM_ROOT_OF_UNITY 17 - -static const uint16_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; - - -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -void init_ntt() { - unsigned int i, j, k; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i = 1; i < 128; ++i) - tmp[i] = fqmul(tmp[i-1], MLKEM_ROOT_OF_UNITY*MONT % MLKEM_Q); - - for(i = 0; i < 128; ++i) - zetas[i] = tmp[tree[i]]; - - k = 0; - for(i = 64; i >= 1; i >>= 1) - for(j = i; j < 2*i; ++j) - zetas_inv[k++] = -tmp[128 - tree[j]]; - - zetas_inv[127] = MONT * (MONT * (MLKEM_Q - 1) * ((MLKEM_Q - 1)/128) % MLKEM_Q) % MLKEM_Q; -} - -*/ -int16_t zetas[128] = { - 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628}; - -int16_t zetas_inv[128] = { - 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441}; - - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for(len = 128; len >= 2; len >>= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for(j = start; j < start + len; ++j) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt -* -* Description: Inplace inverse number-theoretic transform in Rq -* input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - - k = 0; - for(len = 2; len <= 128; len <<= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas_inv[k++]; - for(j = start; j < start + len; ++j) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = t - r[j + len]; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for(j = 0; j < 256; ++j) - r[j] = fqmul(r[j], zetas_inv[127]); -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/code/jasmin/mlkem_ref/ntt.h b/code/jasmin/mlkem_ref/ntt.h deleted file mode 100644 index f7ad4605..00000000 --- a/code/jasmin/mlkem_ref/ntt.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include <stdint.h> - -extern int16_t zetas[128]; -extern int16_t zetas_inv[128]; - -void ntt(int16_t *poly); -void splitntt(int16_t *poly); - -void invntt(int16_t *poly); -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#endif diff --git a/code/jasmin/mlkem_ref/params.h b/code/jasmin/mlkem_ref/params.h deleted file mode 100644 index 99ca37a4..00000000 --- a/code/jasmin/mlkem_ref/params.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef MLKEM_K -#define MLKEM_K 3 /* Change this for different security strengths */ -#endif - -/* Don't change parameters below this line */ - -#define MLKEM_N 256 -#define MLKEM_Q 3329 - -#define MLKEM_ETA 2 - -#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define MLKEM_SSBYTES 32 /* size in bytes of shared key */ - -#define MLKEM_POLYBYTES 384 -#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) - - -#if MLKEM_K == 2 -#define MLKEM_POLYCOMPRESSEDBYTES 96 -#define MLKEM_POLYVECCOMPRESSEDBYTES (MLKEM_K * 320) -#elif MLKEM_K == 3 -#define MLKEM_POLYCOMPRESSEDBYTES 128 -#define MLKEM_POLYVECCOMPRESSEDBYTES (MLKEM_K * 320) -#elif MLKEM_K == 4 -#define MLKEM_POLYCOMPRESSEDBYTES 160 -#define MLKEM_POLYVECCOMPRESSEDBYTES (MLKEM_K * 352) -#endif - -#define MLKEM_INDCPA_MSGBYTES MLKEM_SYMBYTES -#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES) -#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES) -#define MLKEM_INDCPA_BYTES (MLKEM_POLYVECCOMPRESSEDBYTES + MLKEM_POLYCOMPRESSEDBYTES) - -#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES) -#define MLKEM_SECRETKEYBYTES (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + 2*MLKEM_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ -#define MLKEM_CIPHERTEXTBYTES MLKEM_INDCPA_BYTES - -#endif diff --git a/code/jasmin/mlkem_ref/poly.c b/code/jasmin/mlkem_ref/poly.c deleted file mode 100644 index ed274703..00000000 --- a/code/jasmin/mlkem_ref/poly.c +++ /dev/null @@ -1,359 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYCOMPRESSEDBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(unsigned char *r, poly *a) -{ - uint8_t t[8]; - int i,j,k=0; - - poly_csubq(a); - -#if (MLKEM_POLYCOMPRESSEDBYTES == 96) - for(i=0;i<MLKEM_N;i+=8) - { - for(j=0;j<8;j++) - t[j] = ((((uint32_t)a->coeffs[i+j] << 3) + MLKEM_Q/2) / MLKEM_Q) & 7; - - r[k] = t[0] | (t[1] << 3) | (t[2] << 6); - r[k+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); - r[k+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); - k += 3; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 128) - for(i=0;i<MLKEM_N;i+=8) - { - for(j=0;j<8;j++) - t[j] = ((((uint32_t)a->coeffs[i+j] << 4) + MLKEM_Q/2) / MLKEM_Q) & 15; - - r[k] = t[0] | (t[1] << 4); - r[k+1] = t[2] | (t[3] << 4); - r[k+2] = t[4] | (t[5] << 4); - r[k+3] = t[6] | (t[7] << 4); - k += 4; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 160) - for(i=0;i<MLKEM_N;i+=8) - { - for(j=0;j<8;j++) - t[j] = ((((uint32_t)a->coeffs[i+j] << 5) + MLKEM_Q/2) / MLKEM_Q) & 31; - - r[k] = t[0] | (t[1] << 5); - r[k+1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); - r[k+2] = (t[3] >> 1) | (t[4] << 4); - r[k+3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); - r[k+4] = (t[6] >> 2) | (t[7] << 3); - k += 5; - } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of length MLKEM_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const unsigned char *a) -{ - int i; -#if (MLKEM_POLYCOMPRESSEDBYTES == 96) - for(i=0;i<MLKEM_N;i+=8) - { - r->coeffs[i+0] = (((a[0] & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+1] = ((((a[0] >> 3) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+2] = ((((a[0] >> 6) | ((a[1] << 2) & 4)) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+3] = ((((a[1] >> 1) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+4] = ((((a[1] >> 4) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+5] = ((((a[1] >> 7) | ((a[2] << 1) & 6)) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+6] = ((((a[2] >> 2) & 7) * MLKEM_Q) + 4) >> 3; - r->coeffs[i+7] = ((((a[2] >> 5)) * MLKEM_Q) + 4) >> 3; - a += 3; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 128) - for(i=0;i<MLKEM_N;i+=8) - { - r->coeffs[i+0] = (((a[0] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+1] = (((a[0] >> 4) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+2] = (((a[1] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+3] = (((a[1] >> 4) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+4] = (((a[2] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+5] = (((a[2] >> 4) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+6] = (((a[3] & 15) * MLKEM_Q) + 8) >> 4; - r->coeffs[i+7] = (((a[3] >> 4) * MLKEM_Q) + 8) >> 4; - a += 4; - } -#elif (MLKEM_POLYCOMPRESSEDBYTES == 160) - for(i=0;i<MLKEM_N;i+=8) - { - r->coeffs[i+0] = (((a[0] & 31) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+1] = ((((a[0] >> 5) | ((a[1] & 3) << 3)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+2] = ((((a[1] >> 2) & 31) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+3] = ((((a[1] >> 7) | ((a[2] & 15) << 1)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+4] = ((((a[2] >> 4) | ((a[3] & 1) << 4)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+5] = ((((a[3] >> 1) & 31) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+6] = ((((a[3] >> 6) | ((a[4] & 7) << 2)) * MLKEM_Q) + 16) >> 5; - r->coeffs[i+7] = (((a[4] >> 3) * MLKEM_Q) + 16) >> 5; - a += 5; - } -#else -#error "MLKEM_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(unsigned char *r, poly *a) -{ - int i; - uint16_t t0, t1; - - poly_csubq(a); - - for(i=0;i<MLKEM_N/2;i++){ - t0 = a->coeffs[2*i]; - t1 = a->coeffs[2*i+1]; - r[3*i] = t0 & 0xff; - r[3*i+1] = (t0 >> 8) | ((t1 & 0xf) << 4); - r[3*i+2] = t1 >> 4; - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of MLKEM_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const unsigned char *a) -{ - int i; - - for(i=0;i<MLKEM_N/2;i++){ - r->coeffs[2*i] = a[3*i] | ((uint16_t)a[3*i+1] & 0x0f) << 8; - r->coeffs[2*i+1] = a[3*i+1] >> 4 | ((uint16_t)a[3*i+2] & 0xff) << 4; - } -} - -/************************************************* -* Name: poly_getnoise -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter MLKEM_ETA -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *seed: pointer to input seed (pointing to array of length MLKEM_SYMBYTES bytes) -* - unsigned char nonce: one-byte input nonce -**************************************************/ -void poly_getnoise(poly *r, const unsigned char *seed, unsigned char nonce) -{ - unsigned char buf[MLKEM_ETA*MLKEM_N/4]; - - prf(buf, MLKEM_ETA*MLKEM_N/4, seed, nonce); - cbd(r, buf); -} - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt(poly *r) -{ - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - - for(i = 0; i < MLKEM_N/4; ++i) { - basemul(r->coeffs + 4*i, a->coeffs + 4*i, b->coeffs + 4*i, zetas[64 + i]); - basemul(r->coeffs + 4*i + 2, a->coeffs + 4*i + 2, b->coeffs + 4*i + 2, -zetas[64 + i]); - } -} - -/************************************************* -* Name: poly_frommont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from Montgomery domain to normal domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_frommont(poly *r) -{ - int i; - const int16_t f = (1ULL << 32) % MLKEM_Q; - - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - int i; - - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = barrett_reduce(r->coeffs[i]); -} - -/************************************************* -* Name: poly_csubq -* -* Description: Applies conditional subtraction of q to each coefficient of a polynomial -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_csubq(poly *r) -{ - int i; - - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = csubq(r->coeffs[i]); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;i<MLKEM_N;i++) - r->coeffs[i] = a->coeffs[i] - b->coeffs[i]; -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const unsigned char msg[MLKEM_SYMBYTES]) -{ - int i,j; - uint16_t mask; - - for(i=0;i<MLKEM_SYMBYTES;i++) - { - for(j=0;j<8;j++) - { - mask = -((msg[i] >> j)&1); - r->coeffs[8*i+j] = mask & ((MLKEM_Q+1)/2); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - unsigned char *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(unsigned char msg[MLKEM_SYMBYTES], poly *a) -{ - uint16_t t; - int i,j; - - poly_csubq(a); - - for(i=0;i<MLKEM_SYMBYTES;i++) - { - msg[i] = 0; - for(j=0;j<8;j++) - { - t = (((a->coeffs[8*i+j] << 1) + MLKEM_Q/2) / MLKEM_Q) & 1; - msg[i] |= t << j; - } - } -} diff --git a/code/jasmin/mlkem_ref/poly.h b/code/jasmin/mlkem_ref/poly.h deleted file mode 100644 index 99613f4c..00000000 --- a/code/jasmin/mlkem_ref/poly.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include <stdint.h> -#include "params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct{ - int16_t coeffs[MLKEM_N]; -} poly; - -void poly_compress(unsigned char *r, poly *a); -void poly_decompress(poly *r, const unsigned char *a); - -void poly_tobytes(unsigned char *r, poly *a); -void poly_frombytes(poly *r, const unsigned char *a); - -void poly_frommsg(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); -void poly_tomsg(unsigned char msg[MLKEM_SYMBYTES], poly *r); - -void poly_getnoise(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt(poly *r); -void poly_invntt(poly *r); -void poly_basemul(poly *r, const poly *a, const poly *b); -void poly_frommont(poly *r); - -void poly_reduce(poly *r); -void poly_csubq(poly *r); - -void poly_add(poly *r, const poly *a, const poly *b); -void poly_sub(poly *r, const poly *a, const poly *b); - - - - - - -void poly_compress_jazz(unsigned char *r, poly *a); -void poly_decompress_jazz(poly *r, const unsigned char *a); - -void poly_tobytes_jazz(unsigned char *r, poly *a); -void poly_frombytes_jazz(poly *r, const unsigned char *a); - -void poly_frommsg_jazz(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); -void poly_tomsg_jazz(unsigned char msg[MLKEM_SYMBYTES], poly *r); - - -void poly_getnoise_jazz(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt_jazz(poly *r); -void poly_invntt_jazz(poly *r); -void poly_basemul_jazz(poly *r, const poly *a, const poly *b); -void poly_frommont_jazz(poly *r); - -void poly_reduce_jazz(poly *r); -void poly_csubq_jazz(poly *r); - -void poly_add2_jazz(poly *r, const poly *b); -void poly_sub_jazz(poly *r, const poly *a, const poly *b); - - -#endif diff --git a/code/jasmin/mlkem_ref/poly_ntt.c b/code/jasmin/mlkem_ref/poly_ntt.c deleted file mode 100644 index 83341812..00000000 --- a/code/jasmin/mlkem_ref/poly_ntt.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "poly.h" -#include "ntt.h" - -void poly_ntt_jazz(poly *r, int16_t *zetas) -{ - ntt(r->coeffs); - poly_reduce(r); -} - - diff --git a/code/jasmin/mlkem_ref/polyvec.c b/code/jasmin/mlkem_ref/polyvec.c deleted file mode 100644 index 53835e84..00000000 --- a/code/jasmin/mlkem_ref/polyvec.c +++ /dev/null @@ -1,237 +0,0 @@ -#include <stdint.h> -#include "polyvec.h" -#include "poly.h" - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(unsigned char *r, polyvec *a) -{ - int i,j,k; - - polyvec_csubq(a); - -#if (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 352)) - uint16_t t[8]; - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/8;j++) - { - for(k=0;k<8;k++) - t[k] = ((((uint32_t)a->vec[i].coeffs[8*j+k] << 11) + MLKEM_Q/2) / MLKEM_Q) & 0x7ff; - - r[11*j+ 0] = t[0] & 0xff; - r[11*j+ 1] = (t[0] >> 8) | ((t[1] & 0x1f) << 3); - r[11*j+ 2] = (t[1] >> 5) | ((t[2] & 0x03) << 6); - r[11*j+ 3] = (t[2] >> 2) & 0xff; - r[11*j+ 4] = (t[2] >> 10) | ((t[3] & 0x7f) << 1); - r[11*j+ 5] = (t[3] >> 7) | ((t[4] & 0x0f) << 4); - r[11*j+ 6] = (t[4] >> 4) | ((t[5] & 0x01) << 7); - r[11*j+ 7] = (t[5] >> 1) & 0xff; - r[11*j+ 8] = (t[5] >> 9) | ((t[6] & 0x3f) << 2); - r[11*j+ 9] = (t[6] >> 6) | ((t[7] & 0x07) << 5); - r[11*j+10] = (t[7] >> 3); - } - r += 352; - } -#elif (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 320)) - uint16_t t[4]; - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/4;j++) - { - for(k=0;k<4;k++) - t[k] = ((((uint32_t)a->vec[i].coeffs[4*j+k] << 10) + MLKEM_Q/2) / MLKEM_Q) & 0x3ff; - - r[5*j+ 0] = t[0] & 0xff; - r[5*j+ 1] = (t[0] >> 8) | ((t[1] & 0x3f) << 2); - r[5*j+ 2] = (t[1] >> 6) | ((t[2] & 0x0f) << 4); - r[5*j+ 3] = (t[2] >> 4) | ((t[3] & 0x03) << 6); - r[5*j+ 4] = (t[3] >> 2); - } - r += 320; - } -#else -#error "MLKEM_POLYVECCOMPRESSEDBYTES needs to be in {320*MLKEM_K, 352*MLKEM_K}" -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - unsigned char *a: pointer to input byte array (of length MLKEM_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const unsigned char *a) -{ - int i,j; -#if (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 352)) - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/8;j++) - { - r->vec[i].coeffs[8*j+0] = (((a[11*j+ 0] | (((uint32_t)a[11*j+ 1] & 0x07) << 8)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+1] = ((((a[11*j+ 1] >> 3) | (((uint32_t)a[11*j+ 2] & 0x3f) << 5)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+2] = ((((a[11*j+ 2] >> 6) | (((uint32_t)a[11*j+ 3] & 0xff) << 2) | (((uint32_t)a[11*j+ 4] & 0x01) << 10)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+3] = ((((a[11*j+ 4] >> 1) | (((uint32_t)a[11*j+ 5] & 0x0f) << 7)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+4] = ((((a[11*j+ 5] >> 4) | (((uint32_t)a[11*j+ 6] & 0x7f) << 4)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+5] = ((((a[11*j+ 6] >> 7) | (((uint32_t)a[11*j+ 7] & 0xff) << 1) | (((uint32_t)a[11*j+ 8] & 0x03) << 9)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+6] = ((((a[11*j+ 8] >> 2) | (((uint32_t)a[11*j+ 9] & 0x1f) << 6)) * MLKEM_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+7] = ((((a[11*j+ 9] >> 5) | (((uint32_t)a[11*j+10] & 0xff) << 3)) * MLKEM_Q) + 1024) >> 11; - } - a += 352; - } -#elif (MLKEM_POLYVECCOMPRESSEDBYTES == (MLKEM_K * 320)) - for(i=0;i<MLKEM_K;i++) - { - for(j=0;j<MLKEM_N/4;j++) - { - r->vec[i].coeffs[4*j+0] = (((a[5*j+ 0] | (((uint32_t)a[5*j+ 1] & 0x03) << 8)) * MLKEM_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+1] = ((((a[5*j+ 1] >> 2) | (((uint32_t)a[5*j+ 2] & 0x0f) << 6)) * MLKEM_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+2] = ((((a[5*j+ 2] >> 4) | (((uint32_t)a[5*j+ 3] & 0x3f) << 4)) * MLKEM_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+3] = ((((a[5*j+ 3] >> 6) | (((uint32_t)a[5*j+ 4] & 0xff) << 2)) * MLKEM_Q) + 512) >> 10; - } - a += 320; - } -#else -#error "MLKEM_POLYVECCOMPRESSEDBYTES needs to be in {320*MLKEM_K, 352*MLKEM_K}" -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for MLKEM_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(unsigned char *r, polyvec *a) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_tobytes(r+i*MLKEM_POLYBYTES, &a->vec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - unsigned char *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials (of length MLKEM_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const unsigned char *a) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_frombytes(&r->vec[i], a+i*MLKEM_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_ntt(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_invntt -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_invntt(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_pointwise_acc -* -* Description: Pointwise multiply elements of a and b and accumulate into r -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) -{ - int i; - poly t; - - poly_basemul(r, &a->vec[0], &b->vec[0]); - for(i=1;i<MLKEM_K;i++) { - poly_basemul(&t, &a->vec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_reduce(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of each element of a vector of polynomials -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_csubq(polyvec *r) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_csubq(&r->vec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - int i; - for(i=0;i<MLKEM_K;i++) - poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); -} diff --git a/code/jasmin/mlkem_ref/polyvec.h b/code/jasmin/mlkem_ref/polyvec.h deleted file mode 100644 index 78aaa397..00000000 --- a/code/jasmin/mlkem_ref/polyvec.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[MLKEM_K]; -} polyvec; - -void polyvec_compress(unsigned char *r, polyvec *a); -void polyvec_decompress(polyvec *r, const unsigned char *a); - -void polyvec_tobytes(unsigned char *r, polyvec *a); -void polyvec_frombytes(polyvec *r, const unsigned char *a); - -void polyvec_ntt(polyvec *r); -void polyvec_invntt(polyvec *r); - -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce(polyvec *r); -void polyvec_csubq(polyvec *r); - -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - - - - -void polyvec_compress_jazz(unsigned char *r, polyvec *a); -void polyvec_decompress_jazz(polyvec *r, const unsigned char *a); - -void polyvec_tobytes_jazz(unsigned char *r, polyvec *a); -void polyvec_frombytes_jazz(polyvec *r, const unsigned char *a); - -void polyvec_ntt_jazz(polyvec *r); -void polyvec_invntt_jazz(polyvec *r); - -void polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce_jazz(polyvec *r); -void polyvec_csubq_jazz(polyvec *r); - -void polyvec_add2_jazz(polyvec *r, const polyvec *b); - - -#endif diff --git a/code/jasmin/mlkem_ref/reduce.c b/code/jasmin/mlkem_ref/reduce.c deleted file mode 100644 index 8d5a6997..00000000 --- a/code/jasmin/mlkem_ref/reduce.c +++ /dev/null @@ -1,62 +0,0 @@ -#include <stdint.h> -#include "params.h" -#include "reduce.h" - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, -* where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) -{ - int32_t t; - int16_t u; - -// printf("a: %d\n", a); - u = a * QINV; - t = (int32_t)u * MLKEM_Q; - t = a - t; - t >>= 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* 16-bit integer congruent to a mod q in {0,...,q} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {0,...,q} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int32_t t; - const int32_t v = (1U << 26)/MLKEM_Q + 1; - - t = v*a; - t >>= 26; - t *= MLKEM_Q; - return a - t; -} - -/************************************************* -* Name: csubq -* -* Description: Conditionallly subtract q -* -* Arguments: - int16_t x: input integer -* -* Returns: a - q if a >= q, else a -**************************************************/ -int16_t csubq(int16_t a) { - a -= MLKEM_Q; - a += (a >> 15) & MLKEM_Q; - return a; -} diff --git a/code/jasmin/mlkem_ref/reduce.h b/code/jasmin/mlkem_ref/reduce.h deleted file mode 100644 index 59ee6ef4..00000000 --- a/code/jasmin/mlkem_ref/reduce.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include <stdint.h> - -#define MONT 2285 // 2^16 % Q -#define QINV 62209 // q^(-1) mod 2^16 - -int16_t montgomery_reduce(int32_t a); - -int16_t barrett_reduce(int16_t a); - -int16_t csubq(int16_t x); - -#endif diff --git a/code/jasmin/mlkem_ref/reduce.jinc b/code/jasmin/mlkem_ref/reduce.jinc index fd29cc3a..2006a707 100644 --- a/code/jasmin/mlkem_ref/reduce.jinc +++ b/code/jasmin/mlkem_ref/reduce.jinc @@ -63,7 +63,6 @@ fn __barrett_reduce(reg u16 a) -> reg u16 //t = #SAR_32(t, 26); t >>s= 26; t *= MLKEM_Q; - r = t; r = a; r -= t; return r; diff --git a/code/jasmin/mlkem_ref/symmetric-fips202.c b/code/jasmin/mlkem_ref/symmetric-fips202.c deleted file mode 100644 index b8f64a41..00000000 --- a/code/jasmin/mlkem_ref/symmetric-fips202.c +++ /dev/null @@ -1,77 +0,0 @@ -#include <stdlib.h> -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: mlkem_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to MLKEM_SYMBYTES input to be absorbed into s -* - unsigned char i additional byte of input -* - unsigned char j additional byte of input -**************************************************/ -void mlkem_shake128_absorb(keccak_state *s, const unsigned char *input, unsigned char x, unsigned char y) -{ - unsigned char extseed[MLKEM_SYMBYTES+2]; - int i; - - for(i=0;i<MLKEM_SYMBYTES;i++) - extseed[i] = input[i]; - extseed[i++] = x; - extseed[i] = y; - shake128_absorb(s->s, extseed, MLKEM_SYMBYTES+2); -} - -/************************************************* -* Name: mlkem_shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - keccak_state *s: pointer to in/output Keccak state -**************************************************/ -void mlkem_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, keccak_state *s) -{ - shake128_squeezeblocks(output, nblocks, s->s); -} - -/************************************************* -* Name: shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: number of requested output bytes -* - const unsigned char * key: pointer to the key (of length MLKEM_SYMBYTES) -* - const unsigned char nonce: single-byte nonce (public PRF input) -**************************************************/ -void shake256_prf(unsigned char *output, unsigned long long outlen, const unsigned char *key, const unsigned char nonce) -{ - unsigned char extkey[MLKEM_SYMBYTES+1]; - size_t i; - - for(i=0;i<MLKEM_SYMBYTES;i++) - extkey[i] = key[i]; - extkey[i] = nonce; - - shake256(output, outlen, extkey, MLKEM_SYMBYTES+1); -} - -void shake256_rkprf(unsigned char *out, const unsigned char *key, const unsigned char *input) -{ - unsigned char extkey[MLKEM_SYMBYTES+MLKEM_CIPHERTEXTBYTES]; - size_t i; - - for(i=0;i<MLKEM_SYMBYTES;i++) - extkey[i] = key[i]; - for(i=0;i<MLKEM_CIPHERTEXTBYTES;i++) - extkey[i+MLKEM_SYMBYTES] = input[i]; - - shake256(out, MLKEM_SYMBYTES, extkey, MLKEM_SYMBYTES+MLKEM_CIPHERTEXTBYTES); -} \ No newline at end of file diff --git a/code/jasmin/mlkem_ref/symmetric.h b/code/jasmin/mlkem_ref/symmetric.h deleted file mode 100644 index b609d9bb..00000000 --- a/code/jasmin/mlkem_ref/symmetric.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include "params.h" - -#ifdef MLKEM_90S - -#include "aes256ctr.h" -#include "sha2.h" - -#if (MLKEM_SSBYTES != 32) -#error "90s variant of Kyber can only generate keys of length 256 bits" -#endif - -#define hash_h(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha512(OUT, IN, INBYTES) -#define xof_absorb(STATE, IN, X, Y) aes256xof_absorb(STATE, IN, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) aes256xof_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) aes256_prf(OUT, OUTBYTES, KEY, NONCE) -#define kdf(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) - -#define XOF_BLOCKBYTES 64 - -typedef aes256xof_ctx xof_state; - -#else - -#include "fips202.h" - -typedef struct { - uint64_t s[25]; -} keccak_state; - -void mlkem_shake128_absorb(keccak_state *s, const unsigned char *input, unsigned char x, unsigned char y); -void mlkem_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, keccak_state *s); -void shake256_prf(unsigned char *output, unsigned long long outlen, const unsigned char *key, const unsigned char nonce); -void shake256_rkprf(unsigned char *out, const unsigned char *key, const unsigned char *input); - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_absorb(STATE, IN, X, Y) mlkem_shake128_absorb(STATE, IN, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) mlkem_shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) shake256_rkprf(OUT, KEY, INPUT) - -#define XOF_BLOCKBYTES 168 - -typedef keccak_state xof_state; - -#endif /* MLKEM_90S */ - -#endif /* SYMMETRIC_H */ diff --git a/code/kyber b/code/kyber new file mode 160000 index 00000000..10b478fc --- /dev/null +++ b/code/kyber @@ -0,0 +1 @@ +Subproject commit 10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd