diff --git a/code/jasmin/avx2v/Makefile b/code/jasmin/avx2v/Makefile deleted file mode 100644 index 1d6ac2ed..00000000 --- a/code/jasmin/avx2v/Makefile +++ /dev/null @@ -1,138 +0,0 @@ -# -*- Makefile -*- - -CC ?= /usr/bin/gcc -GFLAGS ?= -CFLAGS := -Wall -Wextra -g -Ofast -fomit-frame-pointer -JFLAGS := -lea ${JADDFLAGS} -OS := $(shell uname -s) - -.SECONDARY: jpoly.s jpolyvec.s jfips202.s jindcpa.s jindcpa.o jkem.s - -default: test speed - -test: test/test_poly_compress \ - test/test_poly_decompress \ - test/test_poly_tobytes \ - test/test_poly_frombytes \ - test/test_poly_tomsg \ - test/test_poly_frommsg \ - test/test_poly_add2 \ - test/test_poly_sub \ - test/test_poly_ntt \ - test/test_poly_invntt \ - test/test_poly_basemul \ - test/test_poly_frommont \ - test/test_poly_reduce \ - test/test_poly_csubq \ - test/test_poly_getnoise \ - test/test_polyvec_compress\ - test/test_polyvec_decompress\ - test/test_polyvec_tobytes \ - test/test_polyvec_frombytes \ - test/test_polyvec_add2 \ - test/test_polyvec_ntt \ - test/test_polyvec_invntt \ - test/test_polyvec_pointwise_acc \ - test/test_polyvec_reduce\ - test/test_polyvec_csubq \ - test/test_fips202 \ - test/test_indcpa \ - test/test_kem - -speed: test/speed_indcpa \ - test/speed_kyber - -HEADERS = params.h poly.h fips202.h ntt.h indcpa.h kem.h \ - -JHEADERS = params.jinc \ - reduce.jinc \ - fips202_common.jinc \ - fips202.jinc \ - fips202_4x.jinc \ - keccakf1600.jinc \ - consts.jinc \ - shuffle.jinc \ - indcpa.jinc \ - verify.jinc - -POLYHEADERS = poly.jinc \ - consts.jinc \ - -POLYVECHEADERS = polyvec.jinc \ - gen_matrix.jinc \ - -INCS = fq.inc shuffle.inc -SOURCES = poly.c polyvec.c cbd.c fips202.c ntt.c reduce.c symmetric-fips202.c indcpa.c kem.c consts.c shuffle.S fq.S\ - -test/test_indcpa: test/test_indcpa.c $(HEADERS) $(SOURCES) $(INCS) jindcpa.o - $(CC) $(CFLAGS) -o $@ $(SOURCES) jindcpa.o $< - -test/test_kem: test/test_kem.c $(HEADERS) $(SOURCES) $(INCS) jkem.o - $(CC) $(CFLAGS) -o $@ $(SOURCES) jkem.o ~/Desktop/Repos/jasmin/compiler/syscall/jasmin_syscall.o $< - -test/speed_indcpa: test/speed_indcpa.c $(HEADERS) $(SOURCES) $(INCS) jindcpa.o - $(CC) $(CFLAGS) -o $@ $(SOURCES) jindcpa.o $< - -test/speed_kyber: test/speed_kyber.c $(HEADERS) $(SOURCES) $(INCS) jspeed.s - $(CC) $(CFLAGS) -o $@ $(SOURCES) jspeed.s $< - -test/test_fips202: test/test_fips202.c $(HEADERS) fips202.c jfips202.s - $(CC) $(CFLAGS) -o $@ fips202.c jfips202.s $< - -test/test_gen_matrix: test/test_gen_matrix.c $(HEADERS) gen_matrix.s - $(CC) $(CFLAGS) -o $@ gen_matrix.s $< - -test/test_poly_%: test/test_poly_%.c $(HEADERS) $(SOURCES) $(INCS) jpoly.s - $(CC) $(CFLAGS) -o $@ $(SOURCES) jpoly.s $< - -test/test_polyvec_%: test/test_polyvec_%.c $(HEADERS) $(SOURCES) $(INCS) jpolyvec.s - $(CC) $(CFLAGS) -o $@ $(SOURCES) jpolyvec.s $< - -%.s: %.jazz - $(JASMIN) -o $@ $(JFLAGS) $^ - -.PHONY: clean - -clean: - -rm -f *.o - -rm -f gen_matrix.s - -rm -f jindcpa.s - -rm -f jkem.s - -rm -f jfips202.s - -rm -f jpoly.s - -rm -f jpolyvec.s - -rm -f jspeed.s - -rm -f test/test_poly_compress - -rm -f test/test_poly_decompress - -rm -f test/test_poly_tobytes - -rm -f test/test_poly_frombytes - -rm -f test/test_poly_tomsg - -rm -f test/test_poly_frommsg - -rm -f test/test_poly_add2 - -rm -f test/test_poly_sub - -rm -f test/test_poly_ntt - -rm -f test/test_poly_invntt - -rm -f test/test_poly_basemul - -rm -f test/test_poly_frommont - -rm -f test/test_poly_reduce - -rm -f test/test_poly_csubq - -rm -f test/test_poly_getnoise - -rm -f test/test_polyvec_compress - -rm -f test/test_polyvec_decompress - -rm -f test/test_polyvec_tobytes - -rm -f test/test_polyvec_frombytes - -rm -f test/test_polyvec_add2 - -rm -f test/test_polyvec_ntt - -rm -f test/test_polyvec_invntt - -rm -f test/test_polyvec_pointwise_acc - -rm -f test/test_polyvec_reduce - -rm -f test/test_polyvec_csubq - -rm -f test/test_fips202 - -rm -f test/test_gen_matrix - -rm -f test/test_indcpa - -rm -f test/test_kem - -rm -f test/speed_indcpa - -rm -f test/speed_kyber -ifeq ($(OS),Darwin) - -rm -r -f test/*.dSYM -endif diff --git a/code/jasmin/avx2v/cbd.c b/code/jasmin/avx2v/cbd.c deleted file mode 100644 index ddcd7860..00000000 --- a/code/jasmin/avx2v/cbd.c +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: load32_littleendian -* -* Description: load bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const unsigned char *x) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: load24_littleendian -* -* Description: load 3 bytes into a 32-bit integer -* in little-endian order. -* This function is only needed for Kyber-512 -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) -**************************************************/ -#if KYBER_ETA1 == 3 -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif - - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x55555555; - - for(j=0;j<8;j++) { - a = (d >> (4*j+0)) & 0x3; - b = (d >> (4*j+2)) & 0x3; - r->coeffs[8*i+j] = a - b; - } - } -} - -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3. -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -#if KYBER_ETA1 == 3 -static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x00249249; - d += (t>>2) & 0x00249249; - - for(j=0;j<4;j++) { - a = (d >> (6*j+0)) & 0x7; - b = (d >> (6*j+3)) & 0x7; - r->coeffs[4*i+j] = a - b; - } - } -} -#endif - -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) -{ -#if KYBER_ETA1 == 2 - cbd2(r, buf); -#elif KYBER_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) -{ -#if KYBER_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/code/jasmin/avx2v/cbd.h b/code/jasmin/avx2v/cbd.h deleted file mode 100644 index b057c161..00000000 --- a/code/jasmin/avx2v/cbd.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include "poly.h" - -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); - -#endif diff --git a/code/jasmin/avx2v/compile.bench b/code/jasmin/avx2v/compile.bench deleted file mode 100644 index 28cd4c56..00000000 --- a/code/jasmin/avx2v/compile.bench +++ /dev/null @@ -1,147 +0,0 @@ -===================================================== -===== Benchmark with flag -until_typing -===================================================== - -real 0m0.067s -user 0m0.046s -sys 0m0.015s -===================================================== -===== Benchmark with flag -until_cstexp -===================================================== - -real 0m0.133s -user 0m0.096s -sys 0m0.029s -===================================================== -===== Benchmark with flag -until_inline -===================================================== - -real 0m0.312s -user 0m0.285s -sys 0m0.021s -===================================================== -===== Benchmark with flag -until_rmfunc -===================================================== - -real 0m0.368s -user 0m0.322s -sys 0m0.038s -===================================================== -===== Benchmark with flag -until_unroll -===================================================== - -real 0m0.461s -user 0m0.427s -sys 0m0.025s -===================================================== -===== Benchmark with flag -until_splitting -===================================================== - -real 0m0.615s -user 0m0.579s -sys 0m0.029s -===================================================== -===== Benchmark with flag -until_valloc -===================================================== - -real 0m0.035s -user 0m0.016s -sys 0m0.013s -===================================================== -===== Benchmark with flag -until_vallocd -===================================================== - -real 0m0.034s -user 0m0.015s -sys 0m0.013s -===================================================== -===== Benchmark with flag -until_vshare -===================================================== - -real 0m0.034s -user 0m0.015s -sys 0m0.013s -===================================================== -===== Benchmark with flag -until_vshared -===================================================== - -real 0m0.034s -user 0m0.015s -sys 0m0.013s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m1.352s -user 0m1.303s -sys 0m0.039s -===================================================== -===== Benchmark with flag -until_rmarrinit -===================================================== - -real 0m1.143s -user 0m1.091s -sys 0m0.040s -===================================================== -===== Benchmark with flag -until_rmglobals -===================================================== - -real 0m2.055s -user 0m2.000s -sys 0m0.042s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m1.416s -user 0m1.353s -sys 0m0.051s -===================================================== -===== Benchmark with flag -until_makeref -===================================================== - -real 0m1.244s -user 0m1.195s -sys 0m0.039s -===================================================== -===== Benchmark with flag -until_lowering -===================================================== - -real 0m2.203s -user 0m2.149s -sys 0m0.045s -===================================================== -===== Benchmark with flag -until_stkalloc -===================================================== - -real 0m3.250s -user 0m3.190s -sys 0m0.049s -===================================================== -===== Benchmark with flag -until_ralloc -===================================================== - -real 0m3.870s -user 0m3.806s -sys 0m0.050s -===================================================== -===== Benchmark with flag -until_rallocd -===================================================== - -real 0m4.021s -user 0m3.955s -sys 0m0.051s -===================================================== -===== Benchmark with flag -until_linear -===================================================== - -real 0m4.059s -user 0m3.993s -sys 0m0.053s -===================================================== -===== Benchmark with flag -until_asm -===================================================== - -real 0m4.428s -user 0m4.259s -sys 0m0.159s diff --git a/code/jasmin/avx2v/compile.bench.old b/code/jasmin/avx2v/compile.bench.old deleted file mode 100644 index e46e66ee..00000000 --- a/code/jasmin/avx2v/compile.bench.old +++ /dev/null @@ -1,147 +0,0 @@ -===================================================== -===== Benchmark with flag -until_typing -===================================================== - -real 0m0.026s -user 0m0.023s -sys 0m0.004s -===================================================== -===== Benchmark with flag -until_cstexp -===================================================== - -real 0m0.027s -user 0m0.024s -sys 0m0.003s -===================================================== -===== Benchmark with flag -until_inline -===================================================== - -real 0m0.128s -user 0m0.119s -sys 0m0.009s -===================================================== -===== Benchmark with flag -until_rmfunc -===================================================== - -real 0m0.128s -user 0m0.124s -sys 0m0.004s -===================================================== -===== Benchmark with flag -until_unroll -===================================================== - -real 0m0.813s -user 0m0.789s -sys 0m0.024s -===================================================== -===== Benchmark with flag -until_splitting -===================================================== - -real 0m1.017s -user 0m1.012s -sys 0m0.004s -===================================================== -===== Benchmark with flag -until_valloc -===================================================== - -real 0m2.145s -user 0m2.116s -sys 0m0.029s -===================================================== -===== Benchmark with flag -until_vallocd -===================================================== - -real 0m3.375s -user 0m3.322s -sys 0m0.032s -===================================================== -===== Benchmark with flag -until_vshare -===================================================== - -real 0m6.072s -user 0m6.005s -sys 0m0.067s -===================================================== -===== Benchmark with flag -until_vshared -===================================================== - -real 0m9.594s -user 0m9.554s -sys 0m0.039s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m10.981s -user 0m10.943s -sys 0m0.036s -===================================================== -===== Benchmark with flag -until_rmarrinit -===================================================== - -real 0m9.608s -user 0m9.564s -sys 0m0.043s -===================================================== -===== Benchmark with flag -until_rmglobals -===================================================== - -real 0m11.234s -user 0m11.184s -sys 0m0.050s -===================================================== -===== Benchmark with flag -until_arrexp -===================================================== - -real 0m10.989s -user 0m10.908s -sys 0m0.052s -===================================================== -===== Benchmark with flag -until_makeref -===================================================== - -real 0m11.783s -user 0m11.750s -sys 0m0.032s -===================================================== -===== Benchmark with flag -until_lowering -===================================================== - -real 0m12.629s -user 0m12.561s -sys 0m0.068s -===================================================== -===== Benchmark with flag -until_stkalloc -===================================================== - -real 2m27.958s -user 2m27.867s -sys 0m0.088s -===================================================== -===== Benchmark with flag -until_ralloc -===================================================== - -real 4m43.603s -user 4m43.537s -sys 0m0.057s -===================================================== -===== Benchmark with flag -until_rallocd -===================================================== - -real 4m39.180s -user 4m39.085s -sys 0m0.095s -===================================================== -===== Benchmark with flag -until_linear -===================================================== - -real 4m43.906s -user 4m43.843s -sys 0m0.063s -===================================================== -===== Benchmark with flag -until_asm -===================================================== - -real 4m51.571s -user 4m51.416s -sys 0m0.156s diff --git a/code/jasmin/avx2v/consts.c b/code/jasmin/avx2v/consts.c deleted file mode 100644 index 7999477d..00000000 --- a/code/jasmin/avx2v/consts.c +++ /dev/null @@ -1,153 +0,0 @@ -#include -#include "params.h" -#include "consts.h" - -#define Q KYBER_Q -#define MONT ((1U << 16) % Q) -#define QINV 62209 // q^-1 mod 2^16 -#define V (((1U << 26) + Q/2)/Q) -#define FHI (MONT*(MONT*(Q-1)*((Q-1)/128) % Q) % Q) -#define FLO (FHI*QINV % 65536) -#define MONTSQHI (MONT*MONT % Q) -#define MONTSQLO (MONTSQHI*QINV % 65536) -#define MASK 4095 - -const uint16_t qdata[928] __attribute__((aligned(32))) = { -#define _16XQ 0 - Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, - -#define _16XQINV 16 - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - -#define _16XV 32 - V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, - -#define _16XFLO 48 - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - -#define _16XFHI 64 - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - -#define _16XMONTSQLO 80 - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - -#define _16XMONTSQHI 96 - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - -#define _16XMASK 112 - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - -#define _ZETAS_EXP 128 - 31499, 31499, 2571, 2571, 14746, 14746, 2970, 2970, - 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, - 53134, 53134, 53134, 53134, 53134, 53134, 53134, 53134, - 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, - 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, - 44630, 44630, 44630, 44630, 27758, 27758, 27758, 27758, - 61737, 61737, 61737, 61737, 49846, 49846, 49846, 49846, - 3158, 3158, 3158, 3158, 622, 622, 622, 622, - 1577, 1577, 1577, 1577, 182, 182, 182, 182, - 59709, 59709, 17364, 17364, 39176, 39176, 36479, 36479, - 5572, 5572, 64434, 64434, 21439, 21439, 39295, 39295, - 573, 573, 2004, 2004, 264, 264, 383, 383, - 2500, 2500, 1458, 1458, 1727, 1727, 3199, 3199, - 59847, 59020, 1497, 30967, 41972, 20179, 20711, 25081, - 52740, 26617, 16065, 53095, 9135, 64887, 39550, 27837, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, - 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 65202, 54059, 33310, 20494, 37798, 945, 50654, 6182, - 32011, 10631, 29176, 36775, 47051, 17561, 51106, 60261, - 2226, 555, 2078, 1550, 422, 177, 3038, 1574, - 3083, 1159, 2552, 2727, 1739, 2457, 418, 3173, - 11182, 13387, 51303, 43881, 13131, 60950, 23093, 5493, - 33034, 30318, 46795, 12639, 20100, 18525, 19529, 52918, - 430, 843, 871, 105, 587, 3094, 2869, 1653, - 778, 3182, 1483, 1119, 644, 349, 329, 3254, - 788, 788, 1812, 1812, 28191, 28191, 28191, 28191, - 28191, 28191, 28191, 28191, 48842, 48842, 48842, 48842, - 48842, 48842, 48842, 48842, 287, 287, 287, 287, - 287, 287, 287, 287, 202, 202, 202, 202, - 202, 202, 202, 202, 10690, 10690, 10690, 10690, - 1359, 1359, 1359, 1359, 54335, 54335, 54335, 54335, - 31164, 31164, 31164, 31164, 962, 962, 962, 962, - 2127, 2127, 2127, 2127, 1855, 1855, 1855, 1855, - 1468, 1468, 1468, 1468, 37464, 37464, 24313, 24313, - 55004, 55004, 8800, 8800, 18427, 18427, 8859, 8859, - 26676, 26676, 49374, 49374, 2648, 2648, 1017, 1017, - 732, 732, 608, 608, 1787, 1787, 411, 411, - 3124, 3124, 1758, 1758, 19884, 37287, 49650, 56638, - 37227, 9076, 35338, 18250, 13427, 14017, 36381, 52780, - 16832, 4312, 41381, 47622, 2476, 3239, 3058, 830, - 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, - 448, 2264, 677, 2054, 34353, 25435, 58154, 24392, - 44610, 10946, 24215, 16990, 10336, 57603, 43035, 10907, - 31637, 28644, 23998, 48114, 817, 603, 1322, 1864, - 2114, 1218, 2455, 2142, 2144, 2051, 1819, 2459, - 3221, 996, 958, 1522, 20297, 2146, 15356, 33152, - 59257, 50634, 54492, 14470, 44039, 45338, 23211, 48094, - 41677, 45279, 7757, 23132, 1097, 610, 2044, 384, - 3193, 1994, 220, 1670, 1799, 794, 2475, 478, - 3021, 991, 1869, 1628, 0, 0, 0, 0, - -#define _ZETAS_INV_EXP 528 - 42405, 57780, 20258, 23860, 17443, 42326, 20199, 21498, - 51067, 11045, 14903, 6280, 32385, 50181, 63391, 45240, - 1701, 1460, 2338, 308, 2851, 854, 2535, 1530, - 1659, 3109, 1335, 136, 2945, 1285, 2719, 2232, - 17423, 41539, 36893, 33900, 54630, 22502, 7934, 55201, - 48547, 41322, 54591, 20927, 41145, 7383, 40102, 31184, - 1807, 2371, 2333, 108, 870, 1510, 1278, 1185, - 1187, 874, 2111, 1215, 1465, 2007, 2726, 2512, - 17915, 24156, 61225, 48705, 12757, 29156, 51520, 52110, - 47287, 30199, 56461, 28310, 8899, 15887, 28250, 45653, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, - 951, 247, 1421, 3222, 2499, 271, 90, 853, - 16163, 16163, 38861, 38861, 56678, 56678, 47110, 47110, - 56737, 56737, 10533, 10533, 41224, 41224, 28073, 28073, - 1571, 1571, 205, 205, 2918, 2918, 1542, 1542, - 2721, 2721, 2597, 2597, 2312, 2312, 681, 681, - 34373, 34373, 34373, 34373, 11202, 11202, 11202, 11202, - 64178, 64178, 64178, 64178, 54847, 54847, 54847, 54847, - 1861, 1861, 1861, 1861, 1474, 1474, 1474, 1474, - 1202, 1202, 1202, 1202, 2367, 2367, 2367, 2367, - 16695, 16695, 16695, 16695, 16695, 16695, 16695, 16695, - 37346, 37346, 37346, 37346, 37346, 37346, 37346, 37346, - 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, - 3042, 3042, 3042, 3042, 3042, 3042, 3042, 3042, - 64749, 64749, 1517, 1517, 12619, 46008, 47012, 45437, - 52898, 18742, 35219, 32503, 60044, 42444, 4587, 52406, - 21656, 14234, 52150, 54355, 75, 3000, 2980, 2685, - 2210, 1846, 147, 2551, 1676, 460, 235, 2742, - 3224, 2458, 2486, 2899, 5276, 14431, 47976, 18486, - 28762, 36361, 54906, 33526, 59355, 14883, 64592, 27739, - 45043, 32227, 11478, 335, 156, 2911, 872, 1590, - 602, 777, 2170, 246, 1755, 291, 3152, 2907, - 1779, 1251, 2774, 1103, 37700, 25987, 650, 56402, - 12442, 49472, 38920, 12797, 40456, 44826, 45358, 23565, - 34570, 64040, 6517, 5690, 1860, 3203, 1162, 1618, - 666, 320, 8, 2813, 1544, 282, 1838, 1293, - 2314, 552, 2677, 2106, 26242, 26242, 44098, 44098, - 1103, 1103, 59965, 59965, 29058, 29058, 26361, 26361, - 48173, 48173, 5828, 5828, 130, 130, 1602, 1602, - 1871, 1871, 829, 829, 2946, 2946, 3065, 3065, - 1325, 1325, 2756, 2756, 15691, 15691, 15691, 15691, - 3800, 3800, 3800, 3800, 37779, 37779, 37779, 37779, - 20907, 20907, 20907, 20907, 3147, 3147, 3147, 3147, - 1752, 1752, 1752, 1752, 2707, 2707, 2707, 2707, - 171, 171, 171, 171, 12403, 12403, 12403, 12403, - 12403, 12403, 12403, 12403, 52012, 52012, 52012, 52012, - 52012, 52012, 52012, 52012, 1907, 1907, 1907, 1907, - 1907, 1907, 1907, 1907, 1836, 1836, 1836, 1836, - 1836, 1836, 1836, 1836, 50791, 50791, 359, 359, - 60300, 60300, 1932, 1932, 0, 0, 0, 0 -}; diff --git a/code/jasmin/avx2v/consts.h b/code/jasmin/avx2v/consts.h deleted file mode 100644 index 93edff96..00000000 --- a/code/jasmin/avx2v/consts.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef CONSTS_H -#define CONSTS_H - -#include "params.h" - -#define _16XQ 0 -#define _16XQINV 16 -#define _16XV 32 -#define _16XFLO 48 -#define _16XFHI 64 -#define _16XMONTSQLO 80 -#define _16XMONTSQHI 96 -#define _16XMASK 112 -#define _ZETAS_EXP 128 -#define _ZETAS_INV_EXP 528 - -/* The C ABI on MacOS exports all symbols with a leading - * underscore. This means that any symbols we refer to from - * C files (functions) can't be found, and all symbols we - * refer to from ASM also can't be found. - * - * This define helps us get around this - */ -#ifdef __ASSEMBLER__ -#if defined(__WIN32__) || defined(__APPLE__) -#define decorate(s) _##s -#define cdecl2(s) decorate(s) -#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) -#else -#define cdecl(s) KYBER_NAMESPACE(##s) -#endif -#endif - -#ifndef __ASSEMBLER__ -#include -#define qdata KYBER_NAMESPACE(qdata) -extern const uint16_t qdata[]; -#endif - -#endif diff --git a/code/jasmin/avx2v/consts.jinc b/code/jasmin/avx2v/consts.jinc deleted file mode 100644 index ddefdb70..00000000 --- a/code/jasmin/avx2v/consts.jinc +++ /dev/null @@ -1,104 +0,0 @@ -u16[128] jzetas = {2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628}; - - -u16[128] jzetas_inv = {1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441}; - -u16[400] jzetas_exp = {31499, 31499, 2571, 2571, 14746, 14746, 2970, 2970, 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, - 53134, 53134, 53134, 53134, 53134, 53134, 53134, 53134, 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, - 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, 44630, 44630, 44630, 44630, 27758, 27758, 27758, 27758, - 61737, 61737, 61737, 61737, 49846, 49846, 49846, 49846, 3158, 3158, 3158, 3158, 622, 622, 622, 622, - 1577, 1577, 1577, 1577, 182, 182, 182, 182, 59709, 59709, 17364, 17364, 39176, 39176, 36479, 36479, - 5572, 5572, 64434, 64434, 21439, 21439, 39295, 39295, 573, 573, 2004, 2004, 264, 264, 383, 383, - 2500, 2500, 1458, 1458, 1727, 1727, 3199, 3199, 59847, 59020, 1497, 30967, 41972, 20179, 20711, 25081, - 52740, 26617, 16065, 53095, 9135, 64887, 39550, 27837, 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, - 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, 65202, 54059, 33310, 20494, 37798, 945, 50654, 6182, - 32011, 10631, 29176, 36775, 47051, 17561, 51106, 60261, 2226, 555, 2078, 1550, 422, 177, 3038, 1574, - 3083, 1159, 2552, 2727, 1739, 2457, 418, 3173, 11182, 13387, 51303, 43881, 13131, 60950, 23093, 5493, - 33034, 30318, 46795, 12639, 20100, 18525, 19529, 52918, 430, 843, 871, 105, 587, 3094, 2869, 1653, - 778, 3182, 1483, 1119, 644, 349, 329, 3254, 788, 788, 1812, 1812, 28191, 28191, 28191, 28191, - 28191, 28191, 28191, 28191, 48842, 48842, 48842, 48842, 48842, 48842, 48842, 48842, 287, 287, 287, 287, - 287, 287, 287, 287, 202, 202, 202, 202, 202, 202, 202, 202, 10690, 10690, 10690, 10690, - 1359, 1359, 1359, 1359, 54335, 54335, 54335, 54335, 31164, 31164, 31164, 31164, 962, 962, 962, 962, - 2127, 2127, 2127, 2127, 1855, 1855, 1855, 1855, 1468, 1468, 1468, 1468, 37464, 37464, 24313, 24313, - 55004, 55004, 8800, 8800, 18427, 18427, 8859, 8859, 26676, 26676, 49374, 49374, 2648, 2648, 1017, 1017, - 732, 732, 608, 608, 1787, 1787, 411, 411, 3124, 3124, 1758, 1758, 19884, 37287, 49650, 56638, - 37227, 9076, 35338, 18250, 13427, 14017, 36381, 52780, 16832, 4312, 41381, 47622, 2476, 3239, 3058, 830, - 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, 34353, 25435, 58154, 24392, - 44610, 10946, 24215, 16990, 10336, 57603, 43035, 10907, 31637, 28644, 23998, 48114, 817, 603, 1322, 1864, - 2114, 1218, 2455, 2142, 2144, 2051, 1819, 2459, 3221, 996, 958, 1522, 20297, 2146, 15356, 33152, - 59257, 50634, 54492, 14470, 44039, 45338, 23211, 48094, 41677, 45279, 7757, 23132, 1097, 610, 2044, 384, - 3193, 1994, 220, 1670, 1799, 794, 2475, 478, 3021, 991, 1869, 1628, 0, 0, 0, 0}; - -u16[400] jzetas_inv_exp = {42405, 57780, 20258, 23860, 17443, 42326, 20199, 21498, 51067, 11045, 14903, 6280, 32385, 50181, 63391, 45240, - 1701, 1460, 2338, 308, 2851, 854, 2535, 1530, 1659, 3109, 1335, 136, 2945, 1285, 2719, 2232, - 17423, 41539, 36893, 33900, 54630, 22502, 7934, 55201, 48547, 41322, 54591, 20927, 41145, 7383, 40102, 31184, - 1807, 2371, 2333, 108, 870, 1510, 1278, 1185, 1187, 874, 2111, 1215, 1465, 2007, 2726, 2512, - 17915, 24156, 61225, 48705, 12757, 29156, 51520, 52110, 47287, 30199, 56461, 28310, 8899, 15887, 28250, 45653, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 16163, 16163, 38861, 38861, 56678, 56678, 47110, 47110, 56737, 56737, 10533, 10533, 41224, 41224, 28073, 28073, - 1571, 1571, 205, 205, 2918, 2918, 1542, 1542, 2721, 2721, 2597, 2597, 2312, 2312, 681, 681, - 34373, 34373, 34373, 34373, 11202, 11202, 11202, 11202, 64178, 64178, 64178, 64178, 54847, 54847, 54847, 54847, - 1861, 1861, 1861, 1861, 1474, 1474, 1474, 1474, 1202, 1202, 1202, 1202, 2367, 2367, 2367, 2367, - 16695, 16695, 16695, 16695, 16695, 16695, 16695, 16695, 37346, 37346, 37346, 37346, 37346, 37346, 37346, 37346, - 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3042, 3042, 3042, 3042, 3042, 3042, 3042, 3042, - 64749, 64749, 1517, 1517, 12619, 46008, 47012, 45437, 52898, 18742, 35219, 32503, 60044, 42444, 4587, 52406, - 21656, 14234, 52150, 54355, 75, 3000, 2980, 2685, 2210, 1846, 147, 2551, 1676, 460, 235, 2742, - 3224, 2458, 2486, 2899, 5276, 14431, 47976, 18486, 28762, 36361, 54906, 33526, 59355, 14883, 64592, 27739, - 45043, 32227, 11478, 335, 156, 2911, 872, 1590, 602, 777, 2170, 246, 1755, 291, 3152, 2907, - 1779, 1251, 2774, 1103, 37700, 25987, 650, 56402, 12442, 49472, 38920, 12797, 40456, 44826, 45358, 23565, - 34570, 64040, 6517, 5690, 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, - 2314, 552, 2677, 2106, 26242, 26242, 44098, 44098, 1103, 1103, 59965, 59965, 29058, 29058, 26361, 26361, - 48173, 48173, 5828, 5828, 130, 130, 1602, 1602, 1871, 1871, 829, 829, 2946, 2946, 3065, 3065, - 1325, 1325, 2756, 2756, 15691, 15691, 15691, 15691, 3800, 3800, 3800, 3800, 37779, 37779, 37779, 37779, - 20907, 20907, 20907, 20907, 3147, 3147, 3147, 3147, 1752, 1752, 1752, 1752, 2707, 2707, 2707, 2707, - 171, 171, 171, 171, 12403, 12403, 12403, 12403, 12403, 12403, 12403, 12403, 52012, 52012, 52012, 52012, - 52012, 52012, 52012, 52012, 1907, 1907, 1907, 1907, 1907, 1907, 1907, 1907, 1836, 1836, 1836, 1836, - 1836, 1836, 1836, 1836, 50791, 50791, 359, 359, 60300, 60300, 1932, 1932, 0, 0, 0, 0 -}; - -u16[16] jqx16 = {KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, - KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q, KYBER_Q}; - -u16[16] jqinvx16 = {62209, 62209, 62209, 62209, 62209, 62209, 62209, 62209, - 62209, 62209, 62209, 62209, 62209, 62209, 62209, 62209}; - -u16[16] jvx16 = {20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, - 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159}; - -u16[16] jfhix16 = {1441, 1441, 1441, 1441, 1441, 1441, 1441, 1441, - 1441, 1441, 1441, 1441, 1441, 1441, 1441, 1441}; - -u16[16] jflox16 = {55457, 55457, 55457, 55457, 55457, 55457, 55457, 55457, - 55457, 55457, 55457, 55457, 55457, 55457, 55457, 55457}; - -u16[16] maskx16 = {4095, 4095, 4095, 4095, 4095, 4095, 4095, 4095, - 4095, 4095, 4095, 4095, 4095, 4095, 4095, 4095}; - -u16[16] hqx16_p1 = {1665, 1665, 1665, 1665, 1665, 1665, 1665, 1665, - 1665, 1665, 1665, 1665, 1665, 1665, 1665, 1665}; - -u16[16] hqx16_m1 = {1664, 1664, 1664, 1664, 1664, 1664, 1664, 1664, - 1664, 1664, 1664, 1664, 1664, 1664, 1664, 1664}; - -u16[16] hhqx16 = {832, 832, 832, 832, 832, 832, 832, 832, - 832, 832, 832, 832, 832, 832, 832, 832}; - -u16[16] mqinvx16 = {80635, 80635, 80635, 80635, 80635, 80635, 80635, 80635, - 80635, 80635, 80635, 80635, 80635, 80635, 80635, 80635}; - -u16[16] jdmontx16 = {1353, 1353, 1353, 1353, 1353, 1353, 1353, 1353, - 1353, 1353, 1353, 1353, 1353, 1353, 1353, 1353}; diff --git a/code/jasmin/avx2v/extraction/Array1088.ec b/code/jasmin/avx2v/extraction/Array1088.ec deleted file mode 100644 index de2a1ea4..00000000 --- a/code/jasmin/avx2v/extraction/Array1088.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array1088 with op size <- 1088. diff --git a/code/jasmin/avx2v/extraction/Array128.ec b/code/jasmin/avx2v/extraction/Array128.ec deleted file mode 100644 index e5880272..00000000 --- a/code/jasmin/avx2v/extraction/Array128.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array128 with op size <- 128. diff --git a/code/jasmin/avx2v/extraction/Array136.ec b/code/jasmin/avx2v/extraction/Array136.ec deleted file mode 100644 index d73cf0b1..00000000 --- a/code/jasmin/avx2v/extraction/Array136.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array136 with op size <- 136. diff --git a/code/jasmin/avx2v/extraction/Array16.ec b/code/jasmin/avx2v/extraction/Array16.ec deleted file mode 100644 index 429639d4..00000000 --- a/code/jasmin/avx2v/extraction/Array16.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array16 with op size <- 16. diff --git a/code/jasmin/avx2v/extraction/Array168.ec b/code/jasmin/avx2v/extraction/Array168.ec deleted file mode 100644 index 6abfbe44..00000000 --- a/code/jasmin/avx2v/extraction/Array168.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array168 with op size <- 168. diff --git a/code/jasmin/avx2v/extraction/Array2304.ec b/code/jasmin/avx2v/extraction/Array2304.ec deleted file mode 100644 index f0038311..00000000 --- a/code/jasmin/avx2v/extraction/Array2304.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array2304 with op size <- 2304. diff --git a/code/jasmin/avx2v/extraction/Array24.ec b/code/jasmin/avx2v/extraction/Array24.ec deleted file mode 100644 index 8982b77c..00000000 --- a/code/jasmin/avx2v/extraction/Array24.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array24 with op size <- 24. diff --git a/code/jasmin/avx2v/extraction/Array25.ec b/code/jasmin/avx2v/extraction/Array25.ec deleted file mode 100644 index 30bcb172..00000000 --- a/code/jasmin/avx2v/extraction/Array25.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array25 with op size <- 25. diff --git a/code/jasmin/avx2v/extraction/Array256.ec b/code/jasmin/avx2v/extraction/Array256.ec deleted file mode 100644 index 6f03a141..00000000 --- a/code/jasmin/avx2v/extraction/Array256.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array256 with op size <- 256. diff --git a/code/jasmin/avx2v/extraction/Array32.ec b/code/jasmin/avx2v/extraction/Array32.ec deleted file mode 100644 index c72b94f2..00000000 --- a/code/jasmin/avx2v/extraction/Array32.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array32 with op size <- 32. diff --git a/code/jasmin/avx2v/extraction/Array33.ec b/code/jasmin/avx2v/extraction/Array33.ec deleted file mode 100644 index c60f0144..00000000 --- a/code/jasmin/avx2v/extraction/Array33.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array33 with op size <- 33. diff --git a/code/jasmin/avx2v/extraction/Array34.ec b/code/jasmin/avx2v/extraction/Array34.ec deleted file mode 100644 index d6bb77b1..00000000 --- a/code/jasmin/avx2v/extraction/Array34.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array34 with op size <- 34. diff --git a/code/jasmin/avx2v/extraction/Array4.ec b/code/jasmin/avx2v/extraction/Array4.ec deleted file mode 100644 index bc0e12ed..00000000 --- a/code/jasmin/avx2v/extraction/Array4.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array4 with op size <- 4. diff --git a/code/jasmin/avx2v/extraction/Array400.ec b/code/jasmin/avx2v/extraction/Array400.ec deleted file mode 100644 index 3e9031a1..00000000 --- a/code/jasmin/avx2v/extraction/Array400.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array400 with op size <- 400. diff --git a/code/jasmin/avx2v/extraction/Array5.ec b/code/jasmin/avx2v/extraction/Array5.ec deleted file mode 100644 index 8dc7b36e..00000000 --- a/code/jasmin/avx2v/extraction/Array5.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array5 with op size <- 5. diff --git a/code/jasmin/avx2v/extraction/Array64.ec b/code/jasmin/avx2v/extraction/Array64.ec deleted file mode 100644 index 3ccc4576..00000000 --- a/code/jasmin/avx2v/extraction/Array64.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array64 with op size <- 64. diff --git a/code/jasmin/avx2v/extraction/Array768.ec b/code/jasmin/avx2v/extraction/Array768.ec deleted file mode 100644 index 241538a0..00000000 --- a/code/jasmin/avx2v/extraction/Array768.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array768 with op size <- 768. diff --git a/code/jasmin/avx2v/extraction/Array8.ec b/code/jasmin/avx2v/extraction/Array8.ec deleted file mode 100644 index 0f8b9ee8..00000000 --- a/code/jasmin/avx2v/extraction/Array8.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array8 with op size <- 8. diff --git a/code/jasmin/avx2v/extraction/Array960.ec b/code/jasmin/avx2v/extraction/Array960.ec deleted file mode 100644 index bb0f324a..00000000 --- a/code/jasmin/avx2v/extraction/Array960.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array960 with op size <- 960. diff --git a/code/jasmin/avx2v/extraction/Makefile b/code/jasmin/avx2v/extraction/Makefile deleted file mode 100644 index 640317a1..00000000 --- a/code/jasmin/avx2v/extraction/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# -*- Makefile -*- - -# -------------------------------------------------------------------- --include ../../../Makefile.conf - -# -------------------------------------------------------------------- -.PHONY: all ec clean - -# -------------------------------------------------------------------- -all: ec - -ec: - $(JASMINC) ../jkem.jazz -oec jkem_avx2.ec \ - -ec jade_kem_kyber_kyber768_amd64_avx2v_keypair \ - -ec jade_kem_kyber_kyber768_amd64_avx2v_enc \ - -ec jade_kem_kyber_kyber768_amd64_avx2v_dec - -clean: - rm -f *.ec diff --git a/code/jasmin/avx2v/extraction/WArray1088.ec b/code/jasmin/avx2v/extraction/WArray1088.ec deleted file mode 100644 index 811cd399..00000000 --- a/code/jasmin/avx2v/extraction/WArray1088.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray1088 with op size <- 1088. diff --git a/code/jasmin/avx2v/extraction/WArray128.ec b/code/jasmin/avx2v/extraction/WArray128.ec deleted file mode 100644 index 3c9d6893..00000000 --- a/code/jasmin/avx2v/extraction/WArray128.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray128 with op size <- 128. diff --git a/code/jasmin/avx2v/extraction/WArray136.ec b/code/jasmin/avx2v/extraction/WArray136.ec deleted file mode 100644 index 6fa8f20e..00000000 --- a/code/jasmin/avx2v/extraction/WArray136.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray136 with op size <- 136. diff --git a/code/jasmin/avx2v/extraction/WArray1536.ec b/code/jasmin/avx2v/extraction/WArray1536.ec deleted file mode 100644 index 83524e52..00000000 --- a/code/jasmin/avx2v/extraction/WArray1536.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray1536 with op size <- 1536. diff --git a/code/jasmin/avx2v/extraction/WArray16.ec b/code/jasmin/avx2v/extraction/WArray16.ec deleted file mode 100644 index f2ed50c2..00000000 --- a/code/jasmin/avx2v/extraction/WArray16.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray16 with op size <- 16. diff --git a/code/jasmin/avx2v/extraction/WArray168.ec b/code/jasmin/avx2v/extraction/WArray168.ec deleted file mode 100644 index 7292dff0..00000000 --- a/code/jasmin/avx2v/extraction/WArray168.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray168 with op size <- 168. diff --git a/code/jasmin/avx2v/extraction/WArray192.ec b/code/jasmin/avx2v/extraction/WArray192.ec deleted file mode 100644 index c8564c54..00000000 --- a/code/jasmin/avx2v/extraction/WArray192.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray192 with op size <- 192. diff --git a/code/jasmin/avx2v/extraction/WArray200.ec b/code/jasmin/avx2v/extraction/WArray200.ec deleted file mode 100644 index 99b887c8..00000000 --- a/code/jasmin/avx2v/extraction/WArray200.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray200 with op size <- 200. diff --git a/code/jasmin/avx2v/extraction/WArray256.ec b/code/jasmin/avx2v/extraction/WArray256.ec deleted file mode 100644 index b07b1c22..00000000 --- a/code/jasmin/avx2v/extraction/WArray256.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray256 with op size <- 256. diff --git a/code/jasmin/avx2v/extraction/WArray32.ec b/code/jasmin/avx2v/extraction/WArray32.ec deleted file mode 100644 index b828f8d3..00000000 --- a/code/jasmin/avx2v/extraction/WArray32.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray32 with op size <- 32. diff --git a/code/jasmin/avx2v/extraction/WArray33.ec b/code/jasmin/avx2v/extraction/WArray33.ec deleted file mode 100644 index 1e8a9d93..00000000 --- a/code/jasmin/avx2v/extraction/WArray33.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray33 with op size <- 33. diff --git a/code/jasmin/avx2v/extraction/WArray34.ec b/code/jasmin/avx2v/extraction/WArray34.ec deleted file mode 100644 index 1f331a9b..00000000 --- a/code/jasmin/avx2v/extraction/WArray34.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray34 with op size <- 34. diff --git a/code/jasmin/avx2v/extraction/WArray40.ec b/code/jasmin/avx2v/extraction/WArray40.ec deleted file mode 100644 index 003b6e23..00000000 --- a/code/jasmin/avx2v/extraction/WArray40.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray40 with op size <- 40. diff --git a/code/jasmin/avx2v/extraction/WArray4608.ec b/code/jasmin/avx2v/extraction/WArray4608.ec deleted file mode 100644 index e32c47df..00000000 --- a/code/jasmin/avx2v/extraction/WArray4608.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray4608 with op size <- 4608. diff --git a/code/jasmin/avx2v/extraction/WArray512.ec b/code/jasmin/avx2v/extraction/WArray512.ec deleted file mode 100644 index a690df87..00000000 --- a/code/jasmin/avx2v/extraction/WArray512.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray512 with op size <- 512. diff --git a/code/jasmin/avx2v/extraction/WArray64.ec b/code/jasmin/avx2v/extraction/WArray64.ec deleted file mode 100644 index 6f4aeb6b..00000000 --- a/code/jasmin/avx2v/extraction/WArray64.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray64 with op size <- 64. diff --git a/code/jasmin/avx2v/extraction/WArray768.ec b/code/jasmin/avx2v/extraction/WArray768.ec deleted file mode 100644 index 8852789a..00000000 --- a/code/jasmin/avx2v/extraction/WArray768.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray768 with op size <- 768. diff --git a/code/jasmin/avx2v/extraction/WArray800.ec b/code/jasmin/avx2v/extraction/WArray800.ec deleted file mode 100644 index 2cf23ef6..00000000 --- a/code/jasmin/avx2v/extraction/WArray800.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray800 with op size <- 800. diff --git a/code/jasmin/avx2v/extraction/WArray960.ec b/code/jasmin/avx2v/extraction/WArray960.ec deleted file mode 100644 index c2f56fe9..00000000 --- a/code/jasmin/avx2v/extraction/WArray960.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray960 with op size <- 960. diff --git a/code/jasmin/avx2v/extraction/jkem_avx2.ec b/code/jasmin/avx2v/extraction/jkem_avx2.ec deleted file mode 100644 index 1504daa1..00000000 --- a/code/jasmin/avx2v/extraction/jkem_avx2.ec +++ /dev/null @@ -1,5263 +0,0 @@ -require import AllCore IntDiv CoreMap List Distr. -from Jasmin require import JModel_x86. -import SLH64. - - -require import Array4 Array5 Array8 Array16 Array24 Array25 Array32 Array33 - Array34 Array64 Array128 Array136 Array168 Array256 Array400 - Array768 Array960 Array1088 Array2304. -require import WArray16 WArray32 WArray33 WArray34 WArray40 WArray64 - WArray128 WArray136 WArray168 WArray192 WArray200 WArray256 - WArray512 WArray768 WArray800 WArray960 WArray1088 WArray1536 - WArray4608. - -abbrev pvc_shufbidx_s = Array32.of_list witness [W8.of_int 0; W8.of_int 1; -W8.of_int 2; W8.of_int 3; W8.of_int 4; W8.of_int 8; W8.of_int 9; -W8.of_int 10; W8.of_int 11; W8.of_int 12; W8.of_int (-1); W8.of_int (-1); -W8.of_int (-1); W8.of_int (-1); W8.of_int (-1); W8.of_int (-1); W8.of_int 9; -W8.of_int 10; W8.of_int 11; W8.of_int 12; W8.of_int (-1); W8.of_int (-1); -W8.of_int (-1); W8.of_int (-1); W8.of_int (-1); W8.of_int (-1); W8.of_int 0; -W8.of_int 1; W8.of_int 2; W8.of_int 3; W8.of_int 4; W8.of_int 8]. - - -abbrev pvc_sllvdidx_s = W64.of_int 12. - - -abbrev pvc_shift2_s = W64.of_int 288230380513787905. - - -abbrev pvc_mask_s = W16.of_int 1023. - - -abbrev pvc_shift1_s = W16.of_int 4096. - - -abbrev pvc_off_s = W16.of_int 15. - - -abbrev pvd_mask_s = W32.of_int 2145394680. - - -abbrev pvd_sllvdidx_s = W64.of_int 4. - - -abbrev pvd_shufbdidx_s = Array32.of_list witness [W8.of_int 0; W8.of_int 1; -W8.of_int 1; W8.of_int 2; W8.of_int 2; W8.of_int 3; W8.of_int 3; W8.of_int 4; -W8.of_int 5; W8.of_int 6; W8.of_int 6; W8.of_int 7; W8.of_int 7; W8.of_int 8; -W8.of_int 8; W8.of_int 9; W8.of_int 2; W8.of_int 3; W8.of_int 3; W8.of_int 4; -W8.of_int 4; W8.of_int 5; W8.of_int 5; W8.of_int 6; W8.of_int 7; W8.of_int 8; -W8.of_int 8; W8.of_int 9; W8.of_int 9; W8.of_int 10; W8.of_int 10; -W8.of_int 11]. - - -abbrev pvd_q_s = W32.of_int 218182660. - - -abbrev cbd_jshufbidx = Array32.of_list witness [W8.of_int 0; W8.of_int 1; -W8.of_int 2; W8.of_int (-1); W8.of_int 3; W8.of_int 4; W8.of_int 5; -W8.of_int (-1); W8.of_int 6; W8.of_int 7; W8.of_int 8; W8.of_int (-1); -W8.of_int 9; W8.of_int 10; W8.of_int 11; W8.of_int (-1); W8.of_int 4; -W8.of_int 5; W8.of_int 6; W8.of_int (-1); W8.of_int 7; W8.of_int 8; -W8.of_int 9; W8.of_int (-1); W8.of_int 10; W8.of_int 11; W8.of_int 12; -W8.of_int (-1); W8.of_int 13; W8.of_int 14; W8.of_int 15; W8.of_int (-1)]. - - -abbrev pfm_idx_s = Array16.of_list witness [W8.of_int 0; W8.of_int 1; -W8.of_int 4; W8.of_int 5; W8.of_int 8; W8.of_int 9; W8.of_int 12; -W8.of_int 13; W8.of_int 2; W8.of_int 3; W8.of_int 6; W8.of_int 7; -W8.of_int 10; W8.of_int 11; W8.of_int 14; W8.of_int 15]. - - -abbrev pfm_shift_s = Array4.of_list witness [W32.of_int 3; W32.of_int 2; -W32.of_int 1; W32.of_int 0]. - - -abbrev pd_shift_s = W32.of_int 8390656. - - -abbrev pd_mask_s = W32.of_int 15728655. - - -abbrev pd_jshufbidx = Array32.of_list witness [W8.of_int 0; W8.of_int 0; -W8.of_int 0; W8.of_int 0; W8.of_int 1; W8.of_int 1; W8.of_int 1; W8.of_int 1; -W8.of_int 2; W8.of_int 2; W8.of_int 2; W8.of_int 2; W8.of_int 3; W8.of_int 3; -W8.of_int 3; W8.of_int 3; W8.of_int 4; W8.of_int 4; W8.of_int 4; W8.of_int 4; -W8.of_int 5; W8.of_int 5; W8.of_int 5; W8.of_int 5; W8.of_int 6; W8.of_int 6; -W8.of_int 6; W8.of_int 6; W8.of_int 7; W8.of_int 7; W8.of_int 7; -W8.of_int 7]. - - -abbrev pc_permidx_s = Array8.of_list witness [W32.of_int 0; W32.of_int 4; -W32.of_int 1; W32.of_int 5; W32.of_int 2; W32.of_int 6; W32.of_int 3; -W32.of_int 7]. - - -abbrev pc_shift2_s = W16.of_int 4097. - - -abbrev pc_mask_s = W16.of_int 15. - - -abbrev pc_shift1_s = W16.of_int 512. - - -abbrev KeccakF1600RoundConstants = Array24.of_list witness [W256.of_int 6277101735386680764176071790128604879584176795969512275969; -W256.of_int 206504092890751023779864409751650843328560248233805014854828162; -W256.of_int (-57896044618657891154337237002533387566728630465883811983015055433200855646070); -W256.of_int (-57896044605177918687001956587831074660851270707671256656745893357814858874880); -W256.of_int 206560586806369503906741994397762000772476505824968740465311883; -W256.of_int 13479973339852421633450939126351338586088633588469736715148203130881; -W256.of_int (-57896044605177917877255832722949256082138009781081227190387086677747775274879); -W256.of_int (-57896044618657891964083360867415206145441891392473841449373862113267939246071); -W256.of_int 866240039483361945456297907037747473382616397843792694083722; -W256.of_int 853685836012588583927945763457490263623448044251853669531784; -W256.of_int 13480179078138900667299665761280331841242166839448401411882560290825; -W256.of_int 13479973396346337251931066003935984697246077504727327878873813614602; -W256.of_int 13480179894162126267568165104169664557960801185391384887919156166795; -W256.of_int (-57896044618658096836129800417901987324072977609879901317736128966209602322293); -W256.of_int (-57896044618657891160614338737920068330904702256012416862599232229170367922039); -W256.of_int (-57896044618657892001745971279735290730498322133245470726878922889085012901885); -W256.of_int (-57896044618657892008023073015121971494674393923374075606463099685054525177854); -W256.of_int (-57896044618658096905177919507155475730009767301294554993162073721874237357952); -W256.of_int 205750840682504622088163281136835410743010147018288673381711882; -W256.of_int (-57896044605178124312300604384719547540610971740509902075209375727097995067382); -W256.of_int (-57896044605177917877255832722949256082138009781081227190387086677747775274879); -W256.of_int (-57896044618657891217108254356400195208489348367169860778856823392895978405760); -W256.of_int 13479973339852421633450939126351338586088633588469736715148203130881; -W256.of_int (-57896044605177918636785142704737628547442696386642417620072478990058760667128)]. - - -abbrev rho8 = W256.of_int 13620818001941277694121380808605999856886653716761013959207994299728839901191. - - -abbrev rho56 = W256.of_int 10910488462195273559651782724632284871561478246514020268633800075540923875841. - - -abbrev shake_sep = Array4.of_list witness [W64.of_int (-9223372036854775808); -W64.of_int (-9223372036854775808); W64.of_int (-9223372036854775808); -W64.of_int (-9223372036854775808)]. - - -abbrev KECCAK_RC = Array24.of_list witness [W64.of_int 1; W64.of_int 32898; -W64.of_int (-9223372036854742902); W64.of_int (-9223372034707259392); -W64.of_int 32907; W64.of_int 2147483649; W64.of_int (-9223372034707259263); -W64.of_int (-9223372036854743031); W64.of_int 138; W64.of_int 136; -W64.of_int 2147516425; W64.of_int 2147483658; W64.of_int 2147516555; -W64.of_int (-9223372036854775669); W64.of_int (-9223372036854742903); -W64.of_int (-9223372036854743037); W64.of_int (-9223372036854743038); -W64.of_int (-9223372036854775680); W64.of_int 32778; -W64.of_int (-9223372034707292150); W64.of_int (-9223372034707259263); -W64.of_int (-9223372036854742912); W64.of_int 2147483649; -W64.of_int (-9223372034707259384)]. - - -abbrev jdmontx16 = Array16.of_list witness [W16.of_int 1353; W16.of_int 1353; -W16.of_int 1353; W16.of_int 1353; W16.of_int 1353; W16.of_int 1353; -W16.of_int 1353; W16.of_int 1353; W16.of_int 1353; W16.of_int 1353; -W16.of_int 1353; W16.of_int 1353; W16.of_int 1353; W16.of_int 1353; -W16.of_int 1353; W16.of_int 1353]. - - -abbrev mqinvx16 = Array16.of_list witness [W16.of_int 15099; -W16.of_int 15099; W16.of_int 15099; W16.of_int 15099; W16.of_int 15099; -W16.of_int 15099; W16.of_int 15099; W16.of_int 15099; W16.of_int 15099; -W16.of_int 15099; W16.of_int 15099; W16.of_int 15099; W16.of_int 15099; -W16.of_int 15099; W16.of_int 15099; W16.of_int 15099]. - - -abbrev hhqx16 = Array16.of_list witness [W16.of_int 832; W16.of_int 832; -W16.of_int 832; W16.of_int 832; W16.of_int 832; W16.of_int 832; -W16.of_int 832; W16.of_int 832; W16.of_int 832; W16.of_int 832; -W16.of_int 832; W16.of_int 832; W16.of_int 832; W16.of_int 832; -W16.of_int 832; W16.of_int 832]. - - -abbrev hqx16_m1 = Array16.of_list witness [W16.of_int 1664; W16.of_int 1664; -W16.of_int 1664; W16.of_int 1664; W16.of_int 1664; W16.of_int 1664; -W16.of_int 1664; W16.of_int 1664; W16.of_int 1664; W16.of_int 1664; -W16.of_int 1664; W16.of_int 1664; W16.of_int 1664; W16.of_int 1664; -W16.of_int 1664; W16.of_int 1664]. - - -abbrev hqx16_p1 = Array16.of_list witness [W16.of_int 1665; W16.of_int 1665; -W16.of_int 1665; W16.of_int 1665; W16.of_int 1665; W16.of_int 1665; -W16.of_int 1665; W16.of_int 1665; W16.of_int 1665; W16.of_int 1665; -W16.of_int 1665; W16.of_int 1665; W16.of_int 1665; W16.of_int 1665; -W16.of_int 1665; W16.of_int 1665]. - - -abbrev maskx16 = Array16.of_list witness [W16.of_int 4095; W16.of_int 4095; -W16.of_int 4095; W16.of_int 4095; W16.of_int 4095; W16.of_int 4095; -W16.of_int 4095; W16.of_int 4095; W16.of_int 4095; W16.of_int 4095; -W16.of_int 4095; W16.of_int 4095; W16.of_int 4095; W16.of_int 4095; -W16.of_int 4095; W16.of_int 4095]. - - -abbrev jflox16 = Array16.of_list witness [W16.of_int (-10079); -W16.of_int (-10079); W16.of_int (-10079); W16.of_int (-10079); -W16.of_int (-10079); W16.of_int (-10079); W16.of_int (-10079); -W16.of_int (-10079); W16.of_int (-10079); W16.of_int (-10079); -W16.of_int (-10079); W16.of_int (-10079); W16.of_int (-10079); -W16.of_int (-10079); W16.of_int (-10079); W16.of_int (-10079)]. - - -abbrev jfhix16 = Array16.of_list witness [W16.of_int 1441; W16.of_int 1441; -W16.of_int 1441; W16.of_int 1441; W16.of_int 1441; W16.of_int 1441; -W16.of_int 1441; W16.of_int 1441; W16.of_int 1441; W16.of_int 1441; -W16.of_int 1441; W16.of_int 1441; W16.of_int 1441; W16.of_int 1441; -W16.of_int 1441; W16.of_int 1441]. - - -abbrev jvx16 = Array16.of_list witness [W16.of_int 20159; W16.of_int 20159; -W16.of_int 20159; W16.of_int 20159; W16.of_int 20159; W16.of_int 20159; -W16.of_int 20159; W16.of_int 20159; W16.of_int 20159; W16.of_int 20159; -W16.of_int 20159; W16.of_int 20159; W16.of_int 20159; W16.of_int 20159; -W16.of_int 20159; W16.of_int 20159]. - - -abbrev jqinvx16 = Array16.of_list witness [W16.of_int (-3327); -W16.of_int (-3327); W16.of_int (-3327); W16.of_int (-3327); -W16.of_int (-3327); W16.of_int (-3327); W16.of_int (-3327); -W16.of_int (-3327); W16.of_int (-3327); W16.of_int (-3327); -W16.of_int (-3327); W16.of_int (-3327); W16.of_int (-3327); -W16.of_int (-3327); W16.of_int (-3327); W16.of_int (-3327)]. - - -abbrev jqx16 = Array16.of_list witness [W16.of_int 3329; W16.of_int 3329; -W16.of_int 3329; W16.of_int 3329; W16.of_int 3329; W16.of_int 3329; -W16.of_int 3329; W16.of_int 3329; W16.of_int 3329; W16.of_int 3329; -W16.of_int 3329; W16.of_int 3329; W16.of_int 3329; W16.of_int 3329; -W16.of_int 3329; W16.of_int 3329]. - - -abbrev jzetas_inv_exp = Array400.of_list witness [W16.of_int (-23131); -W16.of_int (-7756); W16.of_int 20258; W16.of_int 23860; W16.of_int 17443; -W16.of_int (-23210); W16.of_int 20199; W16.of_int 21498; W16.of_int (-14469); -W16.of_int 11045; W16.of_int 14903; W16.of_int 6280; W16.of_int 32385; -W16.of_int (-15355); W16.of_int (-2145); W16.of_int (-20296); -W16.of_int 1701; W16.of_int 1460; W16.of_int 2338; W16.of_int 308; -W16.of_int 2851; W16.of_int 854; W16.of_int 2535; W16.of_int 1530; -W16.of_int 1659; W16.of_int 3109; W16.of_int 1335; W16.of_int 136; -W16.of_int 2945; W16.of_int 1285; W16.of_int 2719; W16.of_int 2232; -W16.of_int 17423; W16.of_int (-23997); W16.of_int (-28643); -W16.of_int (-31636); W16.of_int (-10906); W16.of_int 22502; W16.of_int 7934; -W16.of_int (-10335); W16.of_int (-16989); W16.of_int (-24214); -W16.of_int (-10945); W16.of_int 20927; W16.of_int (-24391); W16.of_int 7383; -W16.of_int (-25434); W16.of_int 31184; W16.of_int 1807; W16.of_int 2371; -W16.of_int 2333; W16.of_int 108; W16.of_int 870; W16.of_int 1510; -W16.of_int 1278; W16.of_int 1185; W16.of_int 1187; W16.of_int 874; -W16.of_int 2111; W16.of_int 1215; W16.of_int 1465; W16.of_int 2007; -W16.of_int 2726; W16.of_int 2512; W16.of_int 17915; W16.of_int 24156; -W16.of_int (-4311); W16.of_int (-16831); W16.of_int 12757; W16.of_int 29156; -W16.of_int (-14016); W16.of_int (-13426); W16.of_int (-18249); -W16.of_int 30199; W16.of_int (-9075); W16.of_int 28310; W16.of_int 8899; -W16.of_int 15887; W16.of_int 28250; W16.of_int (-19883); W16.of_int 1275; -W16.of_int 2652; W16.of_int 1065; W16.of_int 2881; W16.of_int 725; -W16.of_int 1508; W16.of_int 2368; W16.of_int 398; W16.of_int 951; -W16.of_int 247; W16.of_int 1421; W16.of_int 3222; W16.of_int 2499; -W16.of_int 271; W16.of_int 90; W16.of_int 853; W16.of_int 16163; -W16.of_int 16163; W16.of_int (-26675); W16.of_int (-26675); -W16.of_int (-8858); W16.of_int (-8858); W16.of_int (-18426); -W16.of_int (-18426); W16.of_int (-8799); W16.of_int (-8799); -W16.of_int 10533; W16.of_int 10533; W16.of_int (-24312); W16.of_int (-24312); -W16.of_int 28073; W16.of_int 28073; W16.of_int 1571; W16.of_int 1571; -W16.of_int 205; W16.of_int 205; W16.of_int 2918; W16.of_int 2918; -W16.of_int 1542; W16.of_int 1542; W16.of_int 2721; W16.of_int 2721; -W16.of_int 2597; W16.of_int 2597; W16.of_int 2312; W16.of_int 2312; -W16.of_int 681; W16.of_int 681; W16.of_int (-31163); W16.of_int (-31163); -W16.of_int (-31163); W16.of_int (-31163); W16.of_int 11202; W16.of_int 11202; -W16.of_int 11202; W16.of_int 11202; W16.of_int (-1358); W16.of_int (-1358); -W16.of_int (-1358); W16.of_int (-1358); W16.of_int (-10689); -W16.of_int (-10689); W16.of_int (-10689); W16.of_int (-10689); -W16.of_int 1861; W16.of_int 1861; W16.of_int 1861; W16.of_int 1861; -W16.of_int 1474; W16.of_int 1474; W16.of_int 1474; W16.of_int 1474; -W16.of_int 1202; W16.of_int 1202; W16.of_int 1202; W16.of_int 1202; -W16.of_int 2367; W16.of_int 2367; W16.of_int 2367; W16.of_int 2367; -W16.of_int 16695; W16.of_int 16695; W16.of_int 16695; W16.of_int 16695; -W16.of_int 16695; W16.of_int 16695; W16.of_int 16695; W16.of_int 16695; -W16.of_int (-28190); W16.of_int (-28190); W16.of_int (-28190); -W16.of_int (-28190); W16.of_int (-28190); W16.of_int (-28190); -W16.of_int (-28190); W16.of_int (-28190); W16.of_int 3127; W16.of_int 3127; -W16.of_int 3127; W16.of_int 3127; W16.of_int 3127; W16.of_int 3127; -W16.of_int 3127; W16.of_int 3127; W16.of_int 3042; W16.of_int 3042; -W16.of_int 3042; W16.of_int 3042; W16.of_int 3042; W16.of_int 3042; -W16.of_int 3042; W16.of_int 3042; W16.of_int (-787); W16.of_int (-787); -W16.of_int 1517; W16.of_int 1517; W16.of_int 12619; W16.of_int (-19528); -W16.of_int (-18524); W16.of_int (-20099); W16.of_int (-12638); -W16.of_int 18742; W16.of_int (-30317); W16.of_int 32503; W16.of_int (-5492); -W16.of_int (-23092); W16.of_int 4587; W16.of_int (-13130); W16.of_int 21656; -W16.of_int 14234; W16.of_int (-13386); W16.of_int (-11181); W16.of_int 75; -W16.of_int 3000; W16.of_int 2980; W16.of_int 2685; W16.of_int 2210; -W16.of_int 1846; W16.of_int 147; W16.of_int 2551; W16.of_int 1676; -W16.of_int 460; W16.of_int 235; W16.of_int 2742; W16.of_int 3224; -W16.of_int 2458; W16.of_int 2486; W16.of_int 2899; W16.of_int 5276; -W16.of_int 14431; W16.of_int (-17560); W16.of_int 18486; W16.of_int 28762; -W16.of_int (-29175); W16.of_int (-10630); W16.of_int (-32010); -W16.of_int (-6181); W16.of_int 14883; W16.of_int (-944); W16.of_int 27739; -W16.of_int (-20493); W16.of_int 32227; W16.of_int 11478; W16.of_int 335; -W16.of_int 156; W16.of_int 2911; W16.of_int 872; W16.of_int 1590; -W16.of_int 602; W16.of_int 777; W16.of_int 2170; W16.of_int 246; -W16.of_int 1755; W16.of_int 291; W16.of_int 3152; W16.of_int 2907; -W16.of_int 1779; W16.of_int 1251; W16.of_int 2774; W16.of_int 1103; -W16.of_int (-27836); W16.of_int 25987; W16.of_int 650; W16.of_int (-9134); -W16.of_int 12442; W16.of_int (-16064); W16.of_int (-26616); W16.of_int 12797; -W16.of_int (-25080); W16.of_int (-20710); W16.of_int (-20178); -W16.of_int 23565; W16.of_int (-30966); W16.of_int (-1496); W16.of_int 6517; -W16.of_int 5690; W16.of_int 1860; W16.of_int 3203; W16.of_int 1162; -W16.of_int 1618; W16.of_int 666; W16.of_int 320; W16.of_int 8; -W16.of_int 2813; W16.of_int 1544; W16.of_int 282; W16.of_int 1838; -W16.of_int 1293; W16.of_int 2314; W16.of_int 552; W16.of_int 2677; -W16.of_int 2106; W16.of_int 26242; W16.of_int 26242; W16.of_int (-21438); -W16.of_int (-21438); W16.of_int 1103; W16.of_int 1103; W16.of_int (-5571); -W16.of_int (-5571); W16.of_int 29058; W16.of_int 29058; W16.of_int 26361; -W16.of_int 26361; W16.of_int (-17363); W16.of_int (-17363); W16.of_int 5828; -W16.of_int 5828; W16.of_int 130; W16.of_int 130; W16.of_int 1602; -W16.of_int 1602; W16.of_int 1871; W16.of_int 1871; W16.of_int 829; -W16.of_int 829; W16.of_int 2946; W16.of_int 2946; W16.of_int 3065; -W16.of_int 3065; W16.of_int 1325; W16.of_int 1325; W16.of_int 2756; -W16.of_int 2756; W16.of_int 15691; W16.of_int 15691; W16.of_int 15691; -W16.of_int 15691; W16.of_int 3800; W16.of_int 3800; W16.of_int 3800; -W16.of_int 3800; W16.of_int (-27757); W16.of_int (-27757); -W16.of_int (-27757); W16.of_int (-27757); W16.of_int 20907; W16.of_int 20907; -W16.of_int 20907; W16.of_int 20907; W16.of_int 3147; W16.of_int 3147; -W16.of_int 3147; W16.of_int 3147; W16.of_int 1752; W16.of_int 1752; -W16.of_int 1752; W16.of_int 1752; W16.of_int 2707; W16.of_int 2707; -W16.of_int 2707; W16.of_int 2707; W16.of_int 171; W16.of_int 171; -W16.of_int 171; W16.of_int 171; W16.of_int 12403; W16.of_int 12403; -W16.of_int 12403; W16.of_int 12403; W16.of_int 12403; W16.of_int 12403; -W16.of_int 12403; W16.of_int 12403; W16.of_int (-13524); W16.of_int (-13524); -W16.of_int (-13524); W16.of_int (-13524); W16.of_int (-13524); -W16.of_int (-13524); W16.of_int (-13524); W16.of_int (-13524); -W16.of_int 1907; W16.of_int 1907; W16.of_int 1907; W16.of_int 1907; -W16.of_int 1907; W16.of_int 1907; W16.of_int 1907; W16.of_int 1907; -W16.of_int 1836; W16.of_int 1836; W16.of_int 1836; W16.of_int 1836; -W16.of_int 1836; W16.of_int 1836; W16.of_int 1836; W16.of_int 1836; -W16.of_int (-14745); W16.of_int (-14745); W16.of_int 359; W16.of_int 359; -W16.of_int (-5236); W16.of_int (-5236); W16.of_int 1932; W16.of_int 1932; -W16.of_int 0; W16.of_int 0; W16.of_int 0; W16.of_int 0]. - - -abbrev jzetas_exp = Array400.of_list witness [W16.of_int 31499; -W16.of_int 31499; W16.of_int 2571; W16.of_int 2571; W16.of_int 14746; -W16.of_int 14746; W16.of_int 2970; W16.of_int 2970; W16.of_int 13525; -W16.of_int 13525; W16.of_int 13525; W16.of_int 13525; W16.of_int 13525; -W16.of_int 13525; W16.of_int 13525; W16.of_int 13525; W16.of_int (-12402); -W16.of_int (-12402); W16.of_int (-12402); W16.of_int (-12402); -W16.of_int (-12402); W16.of_int (-12402); W16.of_int (-12402); -W16.of_int (-12402); W16.of_int 1493; W16.of_int 1493; W16.of_int 1493; -W16.of_int 1493; W16.of_int 1493; W16.of_int 1493; W16.of_int 1493; -W16.of_int 1493; W16.of_int 1422; W16.of_int 1422; W16.of_int 1422; -W16.of_int 1422; W16.of_int 1422; W16.of_int 1422; W16.of_int 1422; -W16.of_int 1422; W16.of_int (-20906); W16.of_int (-20906); -W16.of_int (-20906); W16.of_int (-20906); W16.of_int 27758; W16.of_int 27758; -W16.of_int 27758; W16.of_int 27758; W16.of_int (-3799); W16.of_int (-3799); -W16.of_int (-3799); W16.of_int (-3799); W16.of_int (-15690); -W16.of_int (-15690); W16.of_int (-15690); W16.of_int (-15690); -W16.of_int 3158; W16.of_int 3158; W16.of_int 3158; W16.of_int 3158; -W16.of_int 622; W16.of_int 622; W16.of_int 622; W16.of_int 622; -W16.of_int 1577; W16.of_int 1577; W16.of_int 1577; W16.of_int 1577; -W16.of_int 182; W16.of_int 182; W16.of_int 182; W16.of_int 182; -W16.of_int (-5827); W16.of_int (-5827); W16.of_int 17364; W16.of_int 17364; -W16.of_int (-26360); W16.of_int (-26360); W16.of_int (-29057); -W16.of_int (-29057); W16.of_int 5572; W16.of_int 5572; W16.of_int (-1102); -W16.of_int (-1102); W16.of_int 21439; W16.of_int 21439; W16.of_int (-26241); -W16.of_int (-26241); W16.of_int 573; W16.of_int 573; W16.of_int 2004; -W16.of_int 2004; W16.of_int 264; W16.of_int 264; W16.of_int 383; -W16.of_int 383; W16.of_int 2500; W16.of_int 2500; W16.of_int 1458; -W16.of_int 1458; W16.of_int 1727; W16.of_int 1727; W16.of_int 3199; -W16.of_int 3199; W16.of_int (-5689); W16.of_int (-6516); W16.of_int 1497; -W16.of_int 30967; W16.of_int (-23564); W16.of_int 20179; W16.of_int 20711; -W16.of_int 25081; W16.of_int (-12796); W16.of_int 26617; W16.of_int 16065; -W16.of_int (-12441); W16.of_int 9135; W16.of_int (-649); W16.of_int (-25986); -W16.of_int 27837; W16.of_int 1223; W16.of_int 652; W16.of_int 2777; -W16.of_int 1015; W16.of_int 2036; W16.of_int 1491; W16.of_int 3047; -W16.of_int 1785; W16.of_int 516; W16.of_int 3321; W16.of_int 3009; -W16.of_int 2663; W16.of_int 1711; W16.of_int 2167; W16.of_int 126; -W16.of_int 1469; W16.of_int (-334); W16.of_int (-11477); W16.of_int (-32226); -W16.of_int 20494; W16.of_int (-27738); W16.of_int 945; W16.of_int (-14882); -W16.of_int 6182; W16.of_int 32011; W16.of_int 10631; W16.of_int 29176; -W16.of_int (-28761); W16.of_int (-18485); W16.of_int 17561; -W16.of_int (-14430); W16.of_int (-5275); W16.of_int 2226; W16.of_int 555; -W16.of_int 2078; W16.of_int 1550; W16.of_int 422; W16.of_int 177; -W16.of_int 3038; W16.of_int 1574; W16.of_int 3083; W16.of_int 1159; -W16.of_int 2552; W16.of_int 2727; W16.of_int 1739; W16.of_int 2457; -W16.of_int 418; W16.of_int 3173; W16.of_int 11182; W16.of_int 13387; -W16.of_int (-14233); W16.of_int (-21655); W16.of_int 13131; -W16.of_int (-4586); W16.of_int 23093; W16.of_int 5493; W16.of_int (-32502); -W16.of_int 30318; W16.of_int (-18741); W16.of_int 12639; W16.of_int 20100; -W16.of_int 18525; W16.of_int 19529; W16.of_int (-12618); W16.of_int 430; -W16.of_int 843; W16.of_int 871; W16.of_int 105; W16.of_int 587; -W16.of_int 3094; W16.of_int 2869; W16.of_int 1653; W16.of_int 778; -W16.of_int 3182; W16.of_int 1483; W16.of_int 1119; W16.of_int 644; -W16.of_int 349; W16.of_int 329; W16.of_int 3254; W16.of_int 788; -W16.of_int 788; W16.of_int 1812; W16.of_int 1812; W16.of_int 28191; -W16.of_int 28191; W16.of_int 28191; W16.of_int 28191; W16.of_int 28191; -W16.of_int 28191; W16.of_int 28191; W16.of_int 28191; W16.of_int (-16694); -W16.of_int (-16694); W16.of_int (-16694); W16.of_int (-16694); -W16.of_int (-16694); W16.of_int (-16694); W16.of_int (-16694); -W16.of_int (-16694); W16.of_int 287; W16.of_int 287; W16.of_int 287; -W16.of_int 287; W16.of_int 287; W16.of_int 287; W16.of_int 287; -W16.of_int 287; W16.of_int 202; W16.of_int 202; W16.of_int 202; -W16.of_int 202; W16.of_int 202; W16.of_int 202; W16.of_int 202; -W16.of_int 202; W16.of_int 10690; W16.of_int 10690; W16.of_int 10690; -W16.of_int 10690; W16.of_int 1359; W16.of_int 1359; W16.of_int 1359; -W16.of_int 1359; W16.of_int (-11201); W16.of_int (-11201); -W16.of_int (-11201); W16.of_int (-11201); W16.of_int 31164; W16.of_int 31164; -W16.of_int 31164; W16.of_int 31164; W16.of_int 962; W16.of_int 962; -W16.of_int 962; W16.of_int 962; W16.of_int 2127; W16.of_int 2127; -W16.of_int 2127; W16.of_int 2127; W16.of_int 1855; W16.of_int 1855; -W16.of_int 1855; W16.of_int 1855; W16.of_int 1468; W16.of_int 1468; -W16.of_int 1468; W16.of_int 1468; W16.of_int (-28072); W16.of_int (-28072); -W16.of_int 24313; W16.of_int 24313; W16.of_int (-10532); W16.of_int (-10532); -W16.of_int 8800; W16.of_int 8800; W16.of_int 18427; W16.of_int 18427; -W16.of_int 8859; W16.of_int 8859; W16.of_int 26676; W16.of_int 26676; -W16.of_int (-16162); W16.of_int (-16162); W16.of_int 2648; W16.of_int 2648; -W16.of_int 1017; W16.of_int 1017; W16.of_int 732; W16.of_int 732; -W16.of_int 608; W16.of_int 608; W16.of_int 1787; W16.of_int 1787; -W16.of_int 411; W16.of_int 411; W16.of_int 3124; W16.of_int 3124; -W16.of_int 1758; W16.of_int 1758; W16.of_int 19884; W16.of_int (-28249); -W16.of_int (-15886); W16.of_int (-8898); W16.of_int (-28309); -W16.of_int 9076; W16.of_int (-30198); W16.of_int 18250; W16.of_int 13427; -W16.of_int 14017; W16.of_int (-29155); W16.of_int (-12756); W16.of_int 16832; -W16.of_int 4312; W16.of_int (-24155); W16.of_int (-17914); W16.of_int 2476; -W16.of_int 3239; W16.of_int 3058; W16.of_int 830; W16.of_int 107; -W16.of_int 1908; W16.of_int 3082; W16.of_int 2378; W16.of_int 2931; -W16.of_int 961; W16.of_int 1821; W16.of_int 2604; W16.of_int 448; -W16.of_int 2264; W16.of_int 677; W16.of_int 2054; W16.of_int (-31183); -W16.of_int 25435; W16.of_int (-7382); W16.of_int 24392; W16.of_int (-20926); -W16.of_int 10946; W16.of_int 24215; W16.of_int 16990; W16.of_int 10336; -W16.of_int (-7933); W16.of_int (-22501); W16.of_int 10907; W16.of_int 31637; -W16.of_int 28644; W16.of_int 23998; W16.of_int (-17422); W16.of_int 817; -W16.of_int 603; W16.of_int 1322; W16.of_int 1864; W16.of_int 2114; -W16.of_int 1218; W16.of_int 2455; W16.of_int 2142; W16.of_int 2144; -W16.of_int 2051; W16.of_int 1819; W16.of_int 2459; W16.of_int 3221; -W16.of_int 996; W16.of_int 958; W16.of_int 1522; W16.of_int 20297; -W16.of_int 2146; W16.of_int 15356; W16.of_int (-32384); W16.of_int (-6279); -W16.of_int (-14902); W16.of_int (-11044); W16.of_int 14470; -W16.of_int (-21497); W16.of_int (-20198); W16.of_int 23211; -W16.of_int (-17442); W16.of_int (-23859); W16.of_int (-20257); -W16.of_int 7757; W16.of_int 23132; W16.of_int 1097; W16.of_int 610; -W16.of_int 2044; W16.of_int 384; W16.of_int 3193; W16.of_int 1994; -W16.of_int 220; W16.of_int 1670; W16.of_int 1799; W16.of_int 794; -W16.of_int 2475; W16.of_int 478; W16.of_int 3021; W16.of_int 991; -W16.of_int 1869; W16.of_int 1628; W16.of_int 0; W16.of_int 0; W16.of_int 0; -W16.of_int 0]. - - -abbrev jzetas_inv = Array128.of_list witness [W16.of_int 1701; -W16.of_int 1807; W16.of_int 1460; W16.of_int 2371; W16.of_int 2338; -W16.of_int 2333; W16.of_int 308; W16.of_int 108; W16.of_int 2851; -W16.of_int 870; W16.of_int 854; W16.of_int 1510; W16.of_int 2535; -W16.of_int 1278; W16.of_int 1530; W16.of_int 1185; W16.of_int 1659; -W16.of_int 1187; W16.of_int 3109; W16.of_int 874; W16.of_int 1335; -W16.of_int 2111; W16.of_int 136; W16.of_int 1215; W16.of_int 2945; -W16.of_int 1465; W16.of_int 1285; W16.of_int 2007; W16.of_int 2719; -W16.of_int 2726; W16.of_int 2232; W16.of_int 2512; W16.of_int 75; -W16.of_int 156; W16.of_int 3000; W16.of_int 2911; W16.of_int 2980; -W16.of_int 872; W16.of_int 2685; W16.of_int 1590; W16.of_int 2210; -W16.of_int 602; W16.of_int 1846; W16.of_int 777; W16.of_int 147; -W16.of_int 2170; W16.of_int 2551; W16.of_int 246; W16.of_int 1676; -W16.of_int 1755; W16.of_int 460; W16.of_int 291; W16.of_int 235; -W16.of_int 3152; W16.of_int 2742; W16.of_int 2907; W16.of_int 3224; -W16.of_int 1779; W16.of_int 2458; W16.of_int 1251; W16.of_int 2486; -W16.of_int 2774; W16.of_int 2899; W16.of_int 1103; W16.of_int 1275; -W16.of_int 2652; W16.of_int 1065; W16.of_int 2881; W16.of_int 725; -W16.of_int 1508; W16.of_int 2368; W16.of_int 398; W16.of_int 951; -W16.of_int 247; W16.of_int 1421; W16.of_int 3222; W16.of_int 2499; -W16.of_int 271; W16.of_int 90; W16.of_int 853; W16.of_int 1860; -W16.of_int 3203; W16.of_int 1162; W16.of_int 1618; W16.of_int 666; -W16.of_int 320; W16.of_int 8; W16.of_int 2813; W16.of_int 1544; -W16.of_int 282; W16.of_int 1838; W16.of_int 1293; W16.of_int 2314; -W16.of_int 552; W16.of_int 2677; W16.of_int 2106; W16.of_int 1571; -W16.of_int 205; W16.of_int 2918; W16.of_int 1542; W16.of_int 2721; -W16.of_int 2597; W16.of_int 2312; W16.of_int 681; W16.of_int 130; -W16.of_int 1602; W16.of_int 1871; W16.of_int 829; W16.of_int 2946; -W16.of_int 3065; W16.of_int 1325; W16.of_int 2756; W16.of_int 1861; -W16.of_int 1474; W16.of_int 1202; W16.of_int 2367; W16.of_int 3147; -W16.of_int 1752; W16.of_int 2707; W16.of_int 171; W16.of_int 3127; -W16.of_int 3042; W16.of_int 1907; W16.of_int 1836; W16.of_int 1517; -W16.of_int 359; W16.of_int 758; W16.of_int 1441]. - - -abbrev jzetas = Array128.of_list witness [W16.of_int 2285; W16.of_int 2571; -W16.of_int 2970; W16.of_int 1812; W16.of_int 1493; W16.of_int 1422; -W16.of_int 287; W16.of_int 202; W16.of_int 3158; W16.of_int 622; -W16.of_int 1577; W16.of_int 182; W16.of_int 962; W16.of_int 2127; -W16.of_int 1855; W16.of_int 1468; W16.of_int 573; W16.of_int 2004; -W16.of_int 264; W16.of_int 383; W16.of_int 2500; W16.of_int 1458; -W16.of_int 1727; W16.of_int 3199; W16.of_int 2648; W16.of_int 1017; -W16.of_int 732; W16.of_int 608; W16.of_int 1787; W16.of_int 411; -W16.of_int 3124; W16.of_int 1758; W16.of_int 1223; W16.of_int 652; -W16.of_int 2777; W16.of_int 1015; W16.of_int 2036; W16.of_int 1491; -W16.of_int 3047; W16.of_int 1785; W16.of_int 516; W16.of_int 3321; -W16.of_int 3009; W16.of_int 2663; W16.of_int 1711; W16.of_int 2167; -W16.of_int 126; W16.of_int 1469; W16.of_int 2476; W16.of_int 3239; -W16.of_int 3058; W16.of_int 830; W16.of_int 107; W16.of_int 1908; -W16.of_int 3082; W16.of_int 2378; W16.of_int 2931; W16.of_int 961; -W16.of_int 1821; W16.of_int 2604; W16.of_int 448; W16.of_int 2264; -W16.of_int 677; W16.of_int 2054; W16.of_int 2226; W16.of_int 430; -W16.of_int 555; W16.of_int 843; W16.of_int 2078; W16.of_int 871; -W16.of_int 1550; W16.of_int 105; W16.of_int 422; W16.of_int 587; -W16.of_int 177; W16.of_int 3094; W16.of_int 3038; W16.of_int 2869; -W16.of_int 1574; W16.of_int 1653; W16.of_int 3083; W16.of_int 778; -W16.of_int 1159; W16.of_int 3182; W16.of_int 2552; W16.of_int 1483; -W16.of_int 2727; W16.of_int 1119; W16.of_int 1739; W16.of_int 644; -W16.of_int 2457; W16.of_int 349; W16.of_int 418; W16.of_int 329; -W16.of_int 3173; W16.of_int 3254; W16.of_int 817; W16.of_int 1097; -W16.of_int 603; W16.of_int 610; W16.of_int 1322; W16.of_int 2044; -W16.of_int 1864; W16.of_int 384; W16.of_int 2114; W16.of_int 3193; -W16.of_int 1218; W16.of_int 1994; W16.of_int 2455; W16.of_int 220; -W16.of_int 2142; W16.of_int 1670; W16.of_int 2144; W16.of_int 1799; -W16.of_int 2051; W16.of_int 794; W16.of_int 1819; W16.of_int 2475; -W16.of_int 2459; W16.of_int 478; W16.of_int 3221; W16.of_int 3021; -W16.of_int 996; W16.of_int 991; W16.of_int 958; W16.of_int 1869; -W16.of_int 1522; W16.of_int 1628]. - - -module type Syscall_t = { - proc randombytes_32(_:W8.t Array32.t) : W8.t Array32.t - proc randombytes_64(_:W8.t Array64.t) : W8.t Array64.t -}. - -module Syscall : Syscall_t = { - proc randombytes_32(a:W8.t Array32.t) : W8.t Array32.t = { - a <$ dmap WArray32.darray - (fun a => Array32.init (fun i => WArray32.get8 a i)); - return a; - } - - proc randombytes_64(a:W8.t Array64.t) : W8.t Array64.t = { - a <$ dmap WArray64.darray - (fun a => Array64.init (fun i => WArray64.get8 a i)); - return a; - } -}. - -module M(SC:Syscall_t) = { - proc __shuffle8 (a:W256.t, b:W256.t) : W256.t * W256.t = { - - var r0:W256.t; - var r1:W256.t; - - r0 <- VPERM2I128 a b (W8.of_int 32); - r1 <- VPERM2I128 a b (W8.of_int 49); - return (r0, r1); - } - - proc __shuffle4 (a:W256.t, b:W256.t) : W256.t * W256.t = { - - var r0:W256.t; - var r1:W256.t; - - r0 <- VPUNPCKL_4u64 a b; - r1 <- VPUNPCKH_4u64 a b; - return (r0, r1); - } - - proc __shuffle2 (a:W256.t, b:W256.t) : W256.t * W256.t = { - - var t0:W256.t; - var t1:W256.t; - - t0 <- VMOVSLDUP_256 b; - t0 <- VPBLEND_8u32 a t0 (W8.of_int 170); - a <- VPSRL_4u64 a (W8.of_int 32); - t1 <- VPBLEND_8u32 a b (W8.of_int 170); - return (t0, t1); - } - - proc __shuffle1 (a:W256.t, b:W256.t) : W256.t * W256.t = { - - var r0:W256.t; - var r1:W256.t; - var t0:W256.t; - var t1:W256.t; - - t0 <- VPSLL_8u32 b (W8.of_int 16); - r0 <- VPBLEND_16u16 a t0 (W8.of_int 170); - t1 <- VPSRL_8u32 a (W8.of_int 16); - r1 <- VPBLEND_16u16 t1 b (W8.of_int 170); - return (r0, r1); - } - - proc __nttunpack128 (r0:W256.t, r1:W256.t, r2:W256.t, r3:W256.t, r4:W256.t, - r5:W256.t, r6:W256.t, r7:W256.t) : W256.t * W256.t * - W256.t * W256.t * - W256.t * W256.t * - W256.t * W256.t = { - - - - (r0, r4) <@ __shuffle8 (r0, r4); - (r1, r5) <@ __shuffle8 (r1, r5); - (r2, r6) <@ __shuffle8 (r2, r6); - (r3, r7) <@ __shuffle8 (r3, r7); - (r0, r2) <@ __shuffle4 (r0, r2); - (r4, r6) <@ __shuffle4 (r4, r6); - (r1, r3) <@ __shuffle4 (r1, r3); - (r5, r7) <@ __shuffle4 (r5, r7); - (r0, r1) <@ __shuffle2 (r0, r1); - (r2, r3) <@ __shuffle2 (r2, r3); - (r4, r5) <@ __shuffle2 (r4, r5); - (r6, r7) <@ __shuffle2 (r6, r7); - (r0, r4) <@ __shuffle1 (r0, r4); - (r1, r5) <@ __shuffle1 (r1, r5); - (r2, r6) <@ __shuffle1 (r2, r6); - (r3, r7) <@ __shuffle1 (r3, r7); - return (r0, r4, r1, r5, r2, r6, r3, r7); - } - - proc _nttunpack (rp:W16.t Array256.t) : W16.t Array256.t = { - - var r0:W256.t; - var r1:W256.t; - var r2:W256.t; - var r3:W256.t; - var r4:W256.t; - var r5:W256.t; - var r6:W256.t; - var r7:W256.t; - - r0 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 0)); - r1 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 1)); - r2 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 2)); - r3 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 3)); - r4 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 4)); - r5 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 5)); - r6 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 6)); - r7 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 7)); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __nttunpack128 (r0, r1, r2, r3, r4, - r5, r6, r7); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 0) (r0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 1) (r1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 2) (r2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 3) (r3))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 4) (r4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 5) (r5))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 6) (r6))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 7) (r7))); - r0 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 8)); - r1 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 9)); - r2 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 10)); - r3 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 11)); - r4 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 12)); - r5 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 13)); - r6 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 14)); - r7 <- (get256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 15)); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __nttunpack128 (r0, r1, r2, r3, r4, - r5, r6, r7); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 8) (r0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 9) (r1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 10) (r2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 11) (r3))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 12) (r4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 13) (r5))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 14) (r6))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 15) (r7))); - return (rp); - } - - proc __csubq (r:W256.t, qx16:W256.t) : W256.t = { - - var t:W256.t; - - r <- VPSUB_16u16 r qx16; - t <- VPSRA_16u16 r (W8.of_int 15); - t <- VPAND_256 t qx16; - r <- VPADD_16u16 t r; - return (r); - } - - proc __red16x (r:W256.t, qx16:W256.t, vx16:W256.t) : W256.t = { - - var x:W256.t; - - x <- VPMULH_16u16 r vx16; - x <- VPSRA_16u16 x (W8.of_int 10); - x <- VPMULL_16u16 x qx16; - r <- VPSUB_16u16 r x; - return (r); - } - - proc __fqmulprecomp16x (b:W256.t, al:W256.t, ah:W256.t, qx16:W256.t) : - W256.t = { - - var x:W256.t; - - x <- VPMULL_16u16 al b; - b <- VPMULH_16u16 ah b; - x <- VPMULH_16u16 x qx16; - b <- VPSUB_16u16 b x; - return (b); - } - - proc __fqmulx16 (a:W256.t, b:W256.t, qx16:W256.t, qinvx16:W256.t) : - W256.t = { - - var rd:W256.t; - var rhi:W256.t; - var rlo:W256.t; - - rhi <- VPMULH_16u16 a b; - rlo <- VPMULL_16u16 a b; - rlo <- VPMULL_16u16 rlo qinvx16; - rlo <- VPMULH_16u16 rlo qx16; - rd <- VPSUB_16u16 rhi rlo; - return (rd); - } - - proc __index (x:int, y:int) : int = { - - var r:int; - - r <- ((x %% 5) + (5 * (y %% 5))); - return (r); - } - - proc __keccak_rho_offsets (i:int) : int = { - var aux: int; - - var r:int; - var x:int; - var y:int; - var t:int; - var z:int; - - r <- 0; - x <- 1; - y <- 0; - t <- 0; - while (t < 24) { - if ((i = (x + (5 * y)))) { - r <- ((((t + 1) * (t + 2)) %/ 2) %% 64); - } else { - - } - z <- (((2 * x) + (3 * y)) %% 5); - x <- y; - y <- z; - t <- t + 1; - } - return (r); - } - - proc __rhotates (x:int, y:int) : int = { - - var r:int; - var i:int; - - i <@ __index (x, y); - r <@ __keccak_rho_offsets (i); - return (r); - } - - proc __theta_sum_scalar (a:W64.t Array25.t) : W64.t Array5.t = { - var aux: int; - - var c:W64.t Array5.t; - var i:int; - var ti:int; - var j:int; - c <- witness; - i <- 0; - while (i < 5) { - ti <@ __index (i, 0); - c.[i] <- a.[ti]; - i <- i + 1; - } - j <- 1; - while (j < 5) { - i <- 0; - while (i < 5) { - ti <@ __index (i, j); - c.[i] <- (c.[i] `^` a.[ti]); - i <- i + 1; - } - j <- j + 1; - } - return (c); - } - - proc __theta_rol_scalar (c:W64.t Array5.t) : W64.t Array5.t = { - var aux_1: bool; - var aux_0: bool; - var aux: int; - var aux_2: W64.t; - - var d:W64.t Array5.t; - var i:int; - var _0:bool; - var _1:bool; - d <- witness; - i <- 0; - while (i < 5) { - d.[i] <- c.[((i + 1) %% 5)]; - (aux_1, aux_0, aux_2) <- ROL_64 d.[i] (W8.of_int 1); - _0 <- aux_1; - _1 <- aux_0; - d.[i] <- aux_2; - d.[i] <- (d.[i] `^` c.[((i + 4) %% 5)]); - i <- i + 1; - } - return (d); - } - - proc __rol_sum_scalar (d:W64.t Array5.t, a:W64.t Array25.t, offset:int) : - W64.t Array5.t = { - var aux_1: bool; - var aux_0: bool; - var aux: int; - var aux_2: W64.t; - - var c:W64.t Array5.t; - var j:int; - var j1:int; - var k:int; - var ti:int; - var _0:bool; - var _1:bool; - c <- witness; - j <- 0; - while (j < 5) { - j1 <- ((j + offset) %% 5); - k <@ __rhotates (j1, j); - ti <@ __index (j1, j); - c.[j] <- a.[ti]; - c.[j] <- (c.[j] `^` d.[j1]); - (aux_1, aux_0, aux_2) <- ROL_64 c.[j] (W8.of_int k); - _0 <- aux_1; - _1 <- aux_0; - c.[j] <- aux_2; - j <- j + 1; - } - return (c); - } - - proc __set_row_scalar (r:W64.t Array25.t, row:int, c:W64.t Array5.t, - iota_0:W64.t) : W64.t Array25.t = { - var aux: int; - - var j:int; - var j1:int; - var j2:int; - var t:W64.t; - var ti:int; - - j <- 0; - while (j < 5) { - j1 <- ((j + 1) %% 5); - j2 <- ((j + 2) %% 5); - t <- ((invw c.[j1]) `&` c.[j2]); - if (((row = 0) /\ (j = 0))) { - t <- (t `^` iota_0); - } else { - - } - t <- (t `^` c.[j]); - ti <@ __index (j, row); - r.[ti] <- t; - j <- j + 1; - } - return (r); - } - - proc __round2x_scalar (a:W64.t Array25.t, r:W64.t Array25.t, iota_0:W64.t) : - W64.t Array25.t * W64.t Array25.t = { - - var c:W64.t Array5.t; - var d:W64.t Array5.t; - c <- witness; - d <- witness; - c <@ __theta_sum_scalar (a); - d <@ __theta_rol_scalar (c); - c <@ __rol_sum_scalar (d, a, 0); - r <@ __set_row_scalar (r, 0, c, iota_0); - c <@ __rol_sum_scalar (d, a, 3); - r <@ __set_row_scalar (r, 1, c, iota_0); - c <@ __rol_sum_scalar (d, a, 1); - r <@ __set_row_scalar (r, 2, c, iota_0); - c <@ __rol_sum_scalar (d, a, 4); - r <@ __set_row_scalar (r, 3, c, iota_0); - c <@ __rol_sum_scalar (d, a, 2); - r <@ __set_row_scalar (r, 4, c, iota_0); - return (a, r); - } - - proc _keccakf1600_scalar (a:W64.t Array25.t) : W64.t Array25.t = { - - var iotas_p:W64.t Array24.t; - var round:W64.t; - var iota_0:W64.t; - var round_s:W64.t; - var r:W64.t Array25.t; - iotas_p <- witness; - r <- witness; - iotas_p <- KECCAK_RC; - round <- (W64.of_int 0); - - while ((round \ult (W64.of_int 24))) { - iota_0 <- iotas_p.[(W64.to_uint round)]; - round_s <- round; - (a, r) <@ __round2x_scalar (a, r, iota_0); - round <- round_s; - round <- (round + (W64.of_int 1)); - iota_0 <- iotas_p.[(W64.to_uint round)]; - round_s <- round; - (r, a) <@ __round2x_scalar (r, a, iotas_p.[(W64.to_uint round)]); - round <- round_s; - round <- (round + (W64.of_int 1)); - } - return (a); - } - - proc __st0 (state:W64.t Array25.t) : W64.t Array25.t = { - var aux: int; - - var i:int; - - i <- 0; - while (i < 25) { - state.[i] <- (W64.of_int 0); - i <- i + 1; - } - return (state); - } - - proc __add_full_block (state:W64.t Array25.t, in_0:W64.t, inlen:W64.t, - r8:W64.t) : W64.t Array25.t * W64.t * W64.t = { - - var r64:W64.t; - var i:W64.t; - var t:W64.t; - - r64 <- r8; - r64 <- (r64 `>>` (W8.of_int 3)); - i <- (W64.of_int 0); - - while ((i \ult r64)) { - t <- (loadW64 Glob.mem (W64.to_uint (in_0 + ((W64.of_int 8) * i)))); - state.[(W64.to_uint i)] <- (state.[(W64.to_uint i)] `^` t); - i <- (i + (W64.of_int 1)); - } - in_0 <- (in_0 + r8); - inlen <- (inlen - r8); - return (state, in_0, inlen); - } - - proc __add_final_block (state:W64.t Array25.t, in_0:W64.t, inlen:W64.t, - trail_byte:W8.t, r8:W64.t) : W64.t Array25.t = { - - var inlen8:W64.t; - var i:W64.t; - var t:W64.t; - var c:W8.t; - - inlen8 <- inlen; - inlen8 <- (inlen8 `>>` (W8.of_int 3)); - i <- (W64.of_int 0); - - while ((i \ult inlen8)) { - t <- (loadW64 Glob.mem (W64.to_uint (in_0 + ((W64.of_int 8) * i)))); - state.[(W64.to_uint i)] <- (state.[(W64.to_uint i)] `^` t); - i <- (i + (W64.of_int 1)); - } - i <- (i `<<` (W8.of_int 3)); - - while ((i \ult inlen)) { - c <- (loadW8 Glob.mem (W64.to_uint (in_0 + i))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i)) `^` c)))); - i <- (i + (W64.of_int 1)); - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i)) `^` trail_byte)))); - i <- r8; - i <- (i - (W64.of_int 1)); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i)) `^` (W8.of_int 128))))); - return (state); - } - - proc _isha3_256 (out:W8.t Array32.t, in_0:W64.t, inlen:W64.t) : W8.t Array32.t = { - var aux: int; - - var s_out:W8.t Array32.t; - var state:W64.t Array25.t; - var r8:W64.t; - var ilen:W64.t; - var s_in:W64.t; - var s_ilen:W64.t; - var s_r8:W64.t; - var t8:W8.t; - var i:int; - var t64:W64.t; - s_out <- witness; - state <- witness; - s_out <- out; - state <@ __st0 (state); - r8 <- (W64.of_int 136); - ilen <- inlen; - - while ((r8 \ule ilen)) { - (state, in_0, ilen) <@ __add_full_block (state, in_0, ilen, r8); - s_in <- in_0; - s_ilen <- ilen; - s_r8 <- r8; - state <@ _keccakf1600_scalar (state); - in_0 <- s_in; - ilen <- s_ilen; - r8 <- s_r8; - } - t8 <- (W8.of_int 6); - state <@ __add_final_block (state, in_0, ilen, t8, r8); - state <@ _keccakf1600_scalar (state); - out <- s_out; - i <- 0; - while (i < 4) { - t64 <- state.[i]; - out <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (out).[i_0])) i (t64))); - i <- i + 1; - } - return (out); - } - - proc _shake256_64 (out:W64.t, outlen:W64.t, in_0:W8.t Array64.t) : unit = { - var aux: int; - - var s_out:W64.t; - var s_outlen:W64.t; - var state:W64.t Array25.t; - var i:int; - var t64:W64.t; - var j:W64.t; - var c:W8.t; - state <- witness; - s_out <- out; - s_outlen <- outlen; - state <@ __st0 (state); - i <- 0; - while (i < 8) { - t64 <- (get64 (WArray64.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- (state.[i] `^` t64); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64) `^` (W8.of_int 31))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1)) `^` (W8.of_int 128))))); - state <@ _keccakf1600_scalar (state); - outlen <- s_outlen; - out <- s_out; - - while (((W64.of_int 136) \ult outlen)) { - aux <- (136 %/ 8); - i <- 0; - while (i < aux) { - t64 <- state.[i]; - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (out + (W64.of_int (8 * i)))) (t64); - i <- i + 1; - } - out <- (out + (W64.of_int 136)); - outlen <- (outlen - (W64.of_int 136)); - s_out <- out; - s_outlen <- outlen; - state <@ _keccakf1600_scalar (state); - outlen <- s_outlen; - out <- s_out; - } - s_outlen <- outlen; - outlen <- (outlen `>>` (W8.of_int 3)); - j <- (W64.of_int 0); - - while ((j \ult outlen)) { - t64 <- state.[(W64.to_uint j)]; - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (out + ((W64.of_int 8) * j))) (t64); - j <- (j + (W64.of_int 1)); - } - j <- (j `<<` (W8.of_int 3)); - outlen <- s_outlen; - - while ((j \ult outlen)) { - c <- - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint j)); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (out + j)) (c); - j <- (j + (W64.of_int 1)); - } - return (); - } - - proc _isha3_256_32 (out:W8.t Array32.t, in_0:W8.t Array32.t) : W8.t Array32.t = { - var aux: int; - - var s_out:W8.t Array32.t; - var state:W64.t Array25.t; - var i:int; - var t64:W64.t; - s_out <- witness; - state <- witness; - s_out <- out; - state <@ __st0 (state); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- t64; - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32) `^` (W8.of_int 6))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1) ((W8.of_int 128)))); - state <@ _keccakf1600_scalar (state); - out <- s_out; - i <- 0; - while (i < 4) { - t64 <- state.[i]; - out <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (out).[i_0])) i (t64))); - i <- i + 1; - } - return (out); - } - - proc _sha3_512_64 (out:W8.t Array64.t, in_0:W8.t Array64.t) : W8.t Array64.t = { - var aux: int; - - var state:W64.t Array25.t; - var i:int; - var t64:W64.t; - var out_s:W8.t Array64.t; - out_s <- witness; - state <- witness; - state <@ __st0 (state); - i <- 0; - while (i < 8) { - t64 <- (get64 (WArray64.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- (state.[i] `^` t64); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64) `^` (W8.of_int 6))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1)) `^` (W8.of_int 128))))); - out_s <- out; - state <@ _keccakf1600_scalar (state); - out <- out_s; - i <- 0; - while (i < 8) { - t64 <- state.[i]; - out <- - Array64.init - (WArray64.get8 (WArray64.set64 (WArray64.init8 (fun i_0 => (out).[i_0])) i (t64))); - i <- i + 1; - } - return (out); - } - - proc _sha3_512_32 (out:W8.t Array64.t, in_0:W8.t Array32.t) : W8.t Array64.t = { - var aux: int; - - var state:W64.t Array25.t; - var i:int; - var t64:W64.t; - var out_s:W8.t Array64.t; - out_s <- witness; - state <- witness; - state <@ __st0 (state); - i <- 0; - while (i < 4) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- (state.[i] `^` t64); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32) `^` (W8.of_int 6))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1)) `^` (W8.of_int 128))))); - out_s <- out; - state <@ _keccakf1600_scalar (state); - out <- out_s; - i <- 0; - while (i < 8) { - t64 <- state.[i]; - out <- - Array64.init - (WArray64.get8 (WArray64.set64 (WArray64.init8 (fun i_0 => (out).[i_0])) i (t64))); - i <- i + 1; - } - return (out); - } - - proc _shake128_absorb34 (state:W64.t Array25.t, in_0:W8.t Array34.t) : - W64.t Array25.t = { - var aux: int; - - var i:int; - var t64:W64.t; - var t16:W16.t; - - state <@ __st0 (state); - i <- 0; - while (i < 4) { - t64 <- (get64 (WArray34.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- (state.[i] `^` t64); - i <- i + 1; - } - t16 <- (get16_direct (WArray34.init8 (fun i_0 => (in_0).[i_0])) 32); - state <- - Array25.init - (WArray200.get64 (WArray200.set16 (WArray200.init64 (fun i_0 => (state).[i_0])) 16 (( - (get16 (WArray200.init64 (fun i_0 => (state).[i_0])) 16) `^` t16)))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 34 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 34) `^` (W8.of_int 31))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (168 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (168 - 1)) `^` (W8.of_int 128))))); - return (state); - } - - proc _shake128_squeezeblock (state:W64.t Array25.t, out:W8.t Array168.t) : - W64.t Array25.t * W8.t Array168.t = { - var aux: int; - - var out_s:W8.t Array168.t; - var i:int; - var t:W64.t; - out_s <- witness; - out_s <- out; - state <@ _keccakf1600_scalar (state); - out <- out_s; - aux <- (168 %/ 8); - i <- 0; - while (i < aux) { - t <- state.[i]; - out <- - Array168.init - (WArray168.get8 (WArray168.set64 (WArray168.init8 (fun i_0 => (out).[i_0])) i (t))); - i <- i + 1; - } - return (state, out); - } - - proc __rol_4u64_rho56 (a:W256.t) : W256.t = { - - var r:W256.t; - - r <- VPSHUFB_256 a rho56; - return (r); - } - - proc __rol_4u64_rho8 (a:W256.t) : W256.t = { - - var r:W256.t; - - r <- VPSHUFB_256 a rho8; - return (r); - } - - proc __rol_4u64 (a:W256.t, o:int) : W256.t = { - - var r:W256.t; - var t256:W256.t; - - r <- VPSLL_4u64 a (W8.of_int o); - t256 <- VPSRL_4u64 a (W8.of_int (64 - o)); - r <- (r `|` t256); - return (r); - } - - proc __prepare_theta (a_4x:W256.t Array25.t) : W256.t * W256.t * W256.t * - W256.t * W256.t = { - - var ca:W256.t; - var ce:W256.t; - var ci:W256.t; - var co:W256.t; - var cu:W256.t; - - ca <- a_4x.[20]; - ca <- (ca `^` a_4x.[15]); - ca <- (ca `^` a_4x.[10]); - ca <- (ca `^` a_4x.[5]); - ca <- (ca `^` a_4x.[0]); - ce <- a_4x.[21]; - ce <- (ce `^` a_4x.[16]); - ce <- (ce `^` a_4x.[11]); - ce <- (ce `^` a_4x.[6]); - ce <- (ce `^` a_4x.[1]); - ci <- a_4x.[22]; - ci <- (ci `^` a_4x.[17]); - ci <- (ci `^` a_4x.[12]); - ci <- (ci `^` a_4x.[7]); - ci <- (ci `^` a_4x.[2]); - co <- a_4x.[23]; - co <- (co `^` a_4x.[18]); - co <- (co `^` a_4x.[13]); - co <- (co `^` a_4x.[8]); - co <- (co `^` a_4x.[3]); - cu <- a_4x.[24]; - cu <- (cu `^` a_4x.[19]); - cu <- (cu `^` a_4x.[14]); - cu <- (cu `^` a_4x.[9]); - cu <- (cu `^` a_4x.[4]); - return (ca, ce, ci, co, cu); - } - - proc __first (ca:W256.t, ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t) : - W256.t * W256.t * W256.t * W256.t * W256.t = { - - var da:W256.t; - var de:W256.t; - var di:W256.t; - var do_0:W256.t; - var du:W256.t; - var ce1:W256.t; - var ci1:W256.t; - var co1:W256.t; - var cu1:W256.t; - var ca1:W256.t; - - ce1 <@ __rol_4u64 (ce, 1); - da <- (cu `^` ce1); - ci1 <@ __rol_4u64 (ci, 1); - de <- (ca `^` ci1); - co1 <@ __rol_4u64 (co, 1); - di <- (ce `^` co1); - cu1 <@ __rol_4u64 (cu, 1); - do_0 <- (ci `^` cu1); - ca1 <@ __rol_4u64 (ca, 1); - du <- (co `^` ca1); - return (da, de, di, do_0, du); - } - - proc __second_even (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, - index:int, ca:W256.t, ce:W256.t, ci:W256.t, co:W256.t, - cu:W256.t, da:W256.t, de:W256.t, di:W256.t, - do_0:W256.t, du:W256.t) : W256.t Array25.t * - W256.t Array25.t * W256.t * - W256.t * W256.t * W256.t * - W256.t = { - - var t256:W256.t; - var bba:W256.t; - var bbe:W256.t; - var bbi:W256.t; - var bbo:W256.t; - var bbu:W256.t; - - t256 <- a_4x.[0]; - t256 <- (t256 `^` da); - a_4x.[0] <- t256; - bba <- t256; - t256 <- a_4x.[6]; - t256 <- (t256 `^` de); - a_4x.[6] <- t256; - bbe <@ __rol_4u64 (t256, 44); - t256 <- a_4x.[12]; - t256 <- (t256 `^` di); - a_4x.[12] <- t256; - bbi <@ __rol_4u64 (t256, 43); - t256 <- VPANDN_256 bbe bbi; - t256 <- (t256 `^` bba); - t256 <- (t256 `^` KeccakF1600RoundConstants.[index]); - e_4x.[0] <- t256; - ca <- t256; - t256 <- a_4x.[18]; - t256 <- (t256 `^` do_0); - a_4x.[18] <- t256; - bbo <@ __rol_4u64 (t256, 21); - t256 <- VPANDN_256 bbi bbo; - t256 <- (t256 `^` bbe); - e_4x.[1] <- t256; - ce <- t256; - t256 <- a_4x.[24]; - t256 <- (t256 `^` du); - a_4x.[24] <- t256; - bbu <@ __rol_4u64 (t256, 14); - t256 <- VPANDN_256 bbo bbu; - t256 <- (t256 `^` bbi); - e_4x.[2] <- t256; - ci <- t256; - t256 <- VPANDN_256 bbu bba; - t256 <- (t256 `^` bbo); - e_4x.[3] <- t256; - co <- t256; - t256 <- VPANDN_256 bba bbe; - t256 <- (t256 `^` bbu); - e_4x.[4] <- t256; - cu <- t256; - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __third_even (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, ca:W256.t, - ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var t256:W256.t; - var bga:W256.t; - var bge:W256.t; - var bgi:W256.t; - var bgo:W256.t; - var bgu:W256.t; - - t256 <- a_4x.[3]; - t256 <- (t256 `^` do_0); - a_4x.[3] <- t256; - bga <@ __rol_4u64 (t256, 28); - t256 <- a_4x.[9]; - t256 <- (t256 `^` du); - a_4x.[9] <- t256; - bge <@ __rol_4u64 (t256, 20); - t256 <- a_4x.[10]; - t256 <- (t256 `^` da); - a_4x.[10] <- t256; - bgi <@ __rol_4u64 (t256, 3); - t256 <- VPANDN_256 bge bgi; - t256 <- (t256 `^` bga); - e_4x.[5] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[16]; - t256 <- (t256 `^` de); - a_4x.[16] <- t256; - bgo <@ __rol_4u64 (t256, 45); - t256 <- VPANDN_256 bgi bgo; - t256 <- (t256 `^` bge); - e_4x.[6] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[22]; - t256 <- (t256 `^` di); - a_4x.[22] <- t256; - bgu <@ __rol_4u64 (t256, 61); - t256 <- VPANDN_256 bgo bgu; - t256 <- (t256 `^` bgi); - e_4x.[7] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bgu bga; - t256 <- (t256 `^` bgo); - e_4x.[8] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bga bge; - t256 <- (t256 `^` bgu); - e_4x.[9] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __fourth_even (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, - ca:W256.t, ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, - da:W256.t, de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var t256:W256.t; - var bka:W256.t; - var bke:W256.t; - var bki:W256.t; - var bko:W256.t; - var bku:W256.t; - - t256 <- a_4x.[1]; - t256 <- (t256 `^` de); - a_4x.[1] <- t256; - bka <@ __rol_4u64 (t256, 1); - t256 <- a_4x.[7]; - t256 <- (t256 `^` di); - a_4x.[7] <- t256; - bke <@ __rol_4u64 (t256, 6); - t256 <- a_4x.[13]; - t256 <- (t256 `^` do_0); - a_4x.[13] <- t256; - bki <@ __rol_4u64 (t256, 25); - t256 <- VPANDN_256 bke bki; - t256 <- (t256 `^` bka); - e_4x.[10] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[19]; - t256 <- (t256 `^` du); - a_4x.[19] <- t256; - bko <@ __rol_4u64_rho8 (t256); - t256 <- VPANDN_256 bki bko; - t256 <- (t256 `^` bke); - e_4x.[11] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[20]; - t256 <- (t256 `^` da); - a_4x.[20] <- t256; - bku <@ __rol_4u64 (t256, 18); - t256 <- VPANDN_256 bko bku; - t256 <- (t256 `^` bki); - e_4x.[12] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bku bka; - t256 <- (t256 `^` bko); - e_4x.[13] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bka bke; - t256 <- (t256 `^` bku); - e_4x.[14] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __fifth_even (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, ca:W256.t, - ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var t256:W256.t; - var bma:W256.t; - var bme:W256.t; - var bmi:W256.t; - var bmo:W256.t; - var bmu:W256.t; - - t256 <- a_4x.[4]; - t256 <- (t256 `^` du); - a_4x.[4] <- t256; - bma <@ __rol_4u64 (t256, 27); - t256 <- a_4x.[5]; - t256 <- (t256 `^` da); - a_4x.[5] <- t256; - bme <@ __rol_4u64 (t256, 36); - t256 <- a_4x.[11]; - t256 <- (t256 `^` de); - a_4x.[11] <- t256; - bmi <@ __rol_4u64 (t256, 10); - t256 <- VPANDN_256 bme bmi; - t256 <- (t256 `^` bma); - e_4x.[15] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[17]; - t256 <- (t256 `^` di); - a_4x.[17] <- t256; - bmo <@ __rol_4u64 (t256, 15); - t256 <- VPANDN_256 bmi bmo; - t256 <- (t256 `^` bme); - e_4x.[16] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[23]; - t256 <- (t256 `^` do_0); - a_4x.[23] <- t256; - bmu <@ __rol_4u64_rho56 (t256); - t256 <- VPANDN_256 bmo bmu; - t256 <- (t256 `^` bmi); - e_4x.[17] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bmu bma; - t256 <- (t256 `^` bmo); - e_4x.[18] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bma bme; - t256 <- (t256 `^` bmu); - e_4x.[19] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __sixth_even (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, ca:W256.t, - ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var t256:W256.t; - var bsa:W256.t; - var bse:W256.t; - var bsi:W256.t; - var bso:W256.t; - var bsu:W256.t; - - t256 <- a_4x.[2]; - t256 <- (t256 `^` di); - a_4x.[2] <- t256; - bsa <@ __rol_4u64 (t256, 62); - t256 <- a_4x.[8]; - t256 <- (t256 `^` do_0); - a_4x.[8] <- t256; - bse <@ __rol_4u64 (t256, 55); - t256 <- a_4x.[14]; - t256 <- (t256 `^` du); - a_4x.[14] <- t256; - bsi <@ __rol_4u64 (t256, 39); - t256 <- VPANDN_256 bse bsi; - t256 <- (t256 `^` bsa); - e_4x.[20] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[15]; - t256 <- (t256 `^` da); - a_4x.[15] <- t256; - bso <@ __rol_4u64 (t256, 41); - t256 <- VPANDN_256 bsi bso; - t256 <- (t256 `^` bse); - e_4x.[21] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[21]; - t256 <- (t256 `^` de); - a_4x.[21] <- t256; - bsu <@ __rol_4u64 (t256, 2); - t256 <- VPANDN_256 bso bsu; - t256 <- (t256 `^` bsi); - e_4x.[22] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bsu bsa; - t256 <- (t256 `^` bso); - e_4x.[23] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bsa bse; - t256 <- (t256 `^` bsu); - e_4x.[24] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __second_odd (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, index:int, - ca:W256.t, ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, - da:W256.t, de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var t256:W256.t; - var bba:W256.t; - var bbe:W256.t; - var bbi:W256.t; - var bbo:W256.t; - var bbu:W256.t; - - t256 <- a_4x.[0]; - t256 <- (t256 `^` da); - a_4x.[0] <- t256; - bba <- t256; - t256 <- a_4x.[6]; - t256 <- (t256 `^` de); - a_4x.[6] <- t256; - bbe <@ __rol_4u64 (t256, 44); - t256 <- a_4x.[12]; - t256 <- (t256 `^` di); - a_4x.[12] <- t256; - bbi <@ __rol_4u64 (t256, 43); - t256 <- VPANDN_256 bbe bbi; - t256 <- (t256 `^` bba); - t256 <- (t256 `^` KeccakF1600RoundConstants.[index]); - e_4x.[0] <- t256; - ca <- t256; - t256 <- a_4x.[18]; - t256 <- (t256 `^` do_0); - a_4x.[18] <- t256; - bbo <@ __rol_4u64 (t256, 21); - t256 <- VPANDN_256 bbi bbo; - t256 <- (t256 `^` bbe); - e_4x.[1] <- t256; - ce <- t256; - t256 <- a_4x.[24]; - t256 <- (t256 `^` du); - a_4x.[24] <- t256; - bbu <@ __rol_4u64 (t256, 14); - t256 <- VPANDN_256 bbo bbu; - t256 <- (t256 `^` bbi); - e_4x.[2] <- t256; - ci <- t256; - t256 <- VPANDN_256 bbu bba; - t256 <- (t256 `^` bbo); - e_4x.[3] <- t256; - co <- t256; - t256 <- VPANDN_256 bba bbe; - t256 <- (t256 `^` bbu); - e_4x.[4] <- t256; - cu <- t256; - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __third_odd (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, ca:W256.t, - ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : W256.t Array25.t * - W256.t Array25.t * - W256.t * - W256.t * - W256.t * - W256.t * - W256.t = { - - var t256:W256.t; - var bga:W256.t; - var bge:W256.t; - var bgi:W256.t; - var bgo:W256.t; - var bgu:W256.t; - - t256 <- a_4x.[3]; - t256 <- (t256 `^` do_0); - a_4x.[3] <- t256; - bga <@ __rol_4u64 (t256, 28); - t256 <- a_4x.[9]; - t256 <- (t256 `^` du); - a_4x.[9] <- t256; - bge <@ __rol_4u64 (t256, 20); - t256 <- a_4x.[10]; - t256 <- (t256 `^` da); - a_4x.[10] <- t256; - bgi <@ __rol_4u64 (t256, 3); - t256 <- VPANDN_256 bge bgi; - t256 <- (t256 `^` bga); - e_4x.[5] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[16]; - t256 <- (t256 `^` de); - a_4x.[16] <- t256; - bgo <@ __rol_4u64 (t256, 45); - t256 <- VPANDN_256 bgi bgo; - t256 <- (t256 `^` bge); - e_4x.[6] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[22]; - t256 <- (t256 `^` di); - a_4x.[22] <- t256; - bgu <@ __rol_4u64 (t256, 61); - t256 <- VPANDN_256 bgo bgu; - t256 <- (t256 `^` bgi); - e_4x.[7] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bgu bga; - t256 <- (t256 `^` bgo); - e_4x.[8] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bga bge; - t256 <- (t256 `^` bgu); - e_4x.[9] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __fourth_odd (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, ca:W256.t, - ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var t256:W256.t; - var bka:W256.t; - var bke:W256.t; - var bki:W256.t; - var bko:W256.t; - var bku:W256.t; - - t256 <- a_4x.[1]; - t256 <- (t256 `^` de); - a_4x.[1] <- t256; - bka <@ __rol_4u64 (t256, 1); - t256 <- a_4x.[7]; - t256 <- (t256 `^` di); - a_4x.[7] <- t256; - bke <@ __rol_4u64 (t256, 6); - t256 <- a_4x.[13]; - t256 <- (t256 `^` do_0); - a_4x.[13] <- t256; - bki <@ __rol_4u64 (t256, 25); - t256 <- VPANDN_256 bke bki; - t256 <- (t256 `^` bka); - e_4x.[10] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[19]; - t256 <- (t256 `^` du); - a_4x.[19] <- t256; - bko <@ __rol_4u64_rho8 (t256); - t256 <- VPANDN_256 bki bko; - t256 <- (t256 `^` bke); - e_4x.[11] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[20]; - t256 <- (t256 `^` da); - a_4x.[20] <- t256; - bku <@ __rol_4u64 (t256, 18); - t256 <- VPANDN_256 bko bku; - t256 <- (t256 `^` bki); - e_4x.[12] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bku bka; - t256 <- (t256 `^` bko); - e_4x.[13] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bka bke; - t256 <- (t256 `^` bku); - e_4x.[14] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __fifth_odd (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, ca:W256.t, - ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : W256.t Array25.t * - W256.t Array25.t * - W256.t * - W256.t * - W256.t * - W256.t * - W256.t = { - - var t256:W256.t; - var bma:W256.t; - var bme:W256.t; - var bmi:W256.t; - var bmo:W256.t; - var bmu:W256.t; - - t256 <- a_4x.[4]; - t256 <- (t256 `^` du); - a_4x.[4] <- t256; - bma <@ __rol_4u64 (t256, 27); - t256 <- a_4x.[5]; - t256 <- (t256 `^` da); - a_4x.[5] <- t256; - bme <@ __rol_4u64 (t256, 36); - t256 <- a_4x.[11]; - t256 <- (t256 `^` de); - a_4x.[11] <- t256; - bmi <@ __rol_4u64 (t256, 10); - t256 <- VPANDN_256 bme bmi; - t256 <- (t256 `^` bma); - e_4x.[15] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[17]; - t256 <- (t256 `^` di); - a_4x.[17] <- t256; - bmo <@ __rol_4u64 (t256, 15); - t256 <- VPANDN_256 bmi bmo; - t256 <- (t256 `^` bme); - e_4x.[16] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[23]; - t256 <- (t256 `^` do_0); - a_4x.[23] <- t256; - bmu <@ __rol_4u64_rho56 (t256); - t256 <- VPANDN_256 bmo bmu; - t256 <- (t256 `^` bmi); - e_4x.[17] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bmu bma; - t256 <- (t256 `^` bmo); - e_4x.[18] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bma bme; - t256 <- (t256 `^` bmu); - e_4x.[19] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __sixth_odd (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, ca:W256.t, - ce:W256.t, ci:W256.t, co:W256.t, cu:W256.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : W256.t Array25.t * - W256.t Array25.t * - W256.t * - W256.t * - W256.t * - W256.t * - W256.t = { - - var t256:W256.t; - var bsa:W256.t; - var bse:W256.t; - var bsi:W256.t; - var bso:W256.t; - var bsu:W256.t; - - t256 <- a_4x.[2]; - t256 <- (t256 `^` di); - a_4x.[2] <- t256; - bsa <@ __rol_4u64 (t256, 62); - t256 <- a_4x.[8]; - t256 <- (t256 `^` do_0); - a_4x.[8] <- t256; - bse <@ __rol_4u64 (t256, 55); - t256 <- a_4x.[14]; - t256 <- (t256 `^` du); - a_4x.[14] <- t256; - bsi <@ __rol_4u64 (t256, 39); - t256 <- VPANDN_256 bse bsi; - t256 <- (t256 `^` bsa); - e_4x.[20] <- t256; - ca <- (ca `^` t256); - t256 <- a_4x.[15]; - t256 <- (t256 `^` da); - a_4x.[15] <- t256; - bso <@ __rol_4u64 (t256, 41); - t256 <- VPANDN_256 bsi bso; - t256 <- (t256 `^` bse); - e_4x.[21] <- t256; - ce <- (ce `^` t256); - t256 <- a_4x.[21]; - t256 <- (t256 `^` de); - a_4x.[21] <- t256; - bsu <@ __rol_4u64 (t256, 2); - t256 <- VPANDN_256 bso bsu; - t256 <- (t256 `^` bsi); - e_4x.[22] <- t256; - ci <- (ci `^` t256); - t256 <- VPANDN_256 bsu bsa; - t256 <- (t256 `^` bso); - e_4x.[23] <- t256; - co <- (co `^` t256); - t256 <- VPANDN_256 bsa bse; - t256 <- (t256 `^` bsu); - e_4x.[24] <- t256; - cu <- (cu `^` t256); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __second_last (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, - index:int, da:W256.t, de:W256.t, di:W256.t, - do_0:W256.t, du:W256.t) : W256.t Array25.t * - W256.t Array25.t = { - - var t256:W256.t; - var bba:W256.t; - var bbe:W256.t; - var bbi:W256.t; - var bbo:W256.t; - var bbu:W256.t; - - t256 <- a_4x.[0]; - t256 <- (t256 `^` da); - a_4x.[0] <- t256; - bba <- t256; - t256 <- a_4x.[6]; - t256 <- (t256 `^` de); - a_4x.[6] <- t256; - bbe <@ __rol_4u64 (t256, 44); - t256 <- a_4x.[12]; - t256 <- (t256 `^` di); - a_4x.[12] <- t256; - bbi <@ __rol_4u64 (t256, 43); - t256 <- VPANDN_256 bbe bbi; - t256 <- (t256 `^` bba); - t256 <- (t256 `^` KeccakF1600RoundConstants.[index]); - e_4x.[0] <- t256; - t256 <- a_4x.[18]; - t256 <- (t256 `^` do_0); - a_4x.[18] <- t256; - bbo <@ __rol_4u64 (t256, 21); - t256 <- VPANDN_256 bbi bbo; - t256 <- (t256 `^` bbe); - e_4x.[1] <- t256; - t256 <- a_4x.[24]; - t256 <- (t256 `^` du); - a_4x.[24] <- t256; - bbu <@ __rol_4u64 (t256, 14); - t256 <- VPANDN_256 bbo bbu; - t256 <- (t256 `^` bbi); - e_4x.[2] <- t256; - t256 <- VPANDN_256 bbu bba; - t256 <- (t256 `^` bbo); - e_4x.[3] <- t256; - t256 <- VPANDN_256 bba bbe; - t256 <- (t256 `^` bbu); - e_4x.[4] <- t256; - return (a_4x, e_4x); - } - - proc __third_last (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t = { - - var t256:W256.t; - var bga:W256.t; - var bge:W256.t; - var bgi:W256.t; - var bgo:W256.t; - var bgu:W256.t; - - t256 <- a_4x.[3]; - t256 <- (t256 `^` do_0); - a_4x.[3] <- t256; - bga <@ __rol_4u64 (t256, 28); - t256 <- a_4x.[9]; - t256 <- (t256 `^` du); - a_4x.[9] <- t256; - bge <@ __rol_4u64 (t256, 20); - t256 <- a_4x.[10]; - t256 <- (t256 `^` da); - a_4x.[10] <- t256; - bgi <@ __rol_4u64 (t256, 3); - t256 <- VPANDN_256 bge bgi; - t256 <- (t256 `^` bga); - e_4x.[5] <- t256; - t256 <- a_4x.[16]; - t256 <- (t256 `^` de); - a_4x.[16] <- t256; - bgo <@ __rol_4u64 (t256, 45); - t256 <- VPANDN_256 bgi bgo; - t256 <- (t256 `^` bge); - e_4x.[6] <- t256; - t256 <- a_4x.[22]; - t256 <- (t256 `^` di); - a_4x.[22] <- t256; - bgu <@ __rol_4u64 (t256, 61); - t256 <- VPANDN_256 bgo bgu; - t256 <- (t256 `^` bgi); - e_4x.[7] <- t256; - t256 <- VPANDN_256 bgu bga; - t256 <- (t256 `^` bgo); - e_4x.[8] <- t256; - t256 <- VPANDN_256 bga bge; - t256 <- (t256 `^` bgu); - e_4x.[9] <- t256; - return (a_4x, e_4x); - } - - proc __fourth_last (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, - da:W256.t, de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t = { - - var t256:W256.t; - var bka:W256.t; - var bke:W256.t; - var bki:W256.t; - var bko:W256.t; - var bku:W256.t; - - t256 <- a_4x.[1]; - t256 <- (t256 `^` de); - a_4x.[1] <- t256; - bka <@ __rol_4u64 (t256, 1); - t256 <- a_4x.[7]; - t256 <- (t256 `^` di); - a_4x.[7] <- t256; - bke <@ __rol_4u64 (t256, 6); - t256 <- a_4x.[13]; - t256 <- (t256 `^` do_0); - a_4x.[13] <- t256; - bki <@ __rol_4u64 (t256, 25); - t256 <- VPANDN_256 bke bki; - t256 <- (t256 `^` bka); - e_4x.[10] <- t256; - t256 <- a_4x.[19]; - t256 <- (t256 `^` du); - a_4x.[19] <- t256; - bko <@ __rol_4u64_rho8 (t256); - t256 <- VPANDN_256 bki bko; - t256 <- (t256 `^` bke); - e_4x.[11] <- t256; - t256 <- a_4x.[20]; - t256 <- (t256 `^` da); - a_4x.[20] <- t256; - bku <@ __rol_4u64 (t256, 18); - t256 <- VPANDN_256 bko bku; - t256 <- (t256 `^` bki); - e_4x.[12] <- t256; - t256 <- VPANDN_256 bku bka; - t256 <- (t256 `^` bko); - e_4x.[13] <- t256; - t256 <- VPANDN_256 bka bke; - t256 <- (t256 `^` bku); - e_4x.[14] <- t256; - return (a_4x, e_4x); - } - - proc __fifth_last (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t = { - - var t256:W256.t; - var bma:W256.t; - var bme:W256.t; - var bmi:W256.t; - var bmo:W256.t; - var bmu:W256.t; - - t256 <- a_4x.[4]; - t256 <- (t256 `^` du); - a_4x.[4] <- t256; - bma <@ __rol_4u64 (t256, 27); - t256 <- a_4x.[5]; - t256 <- (t256 `^` da); - a_4x.[5] <- t256; - bme <@ __rol_4u64 (t256, 36); - t256 <- a_4x.[11]; - t256 <- (t256 `^` de); - a_4x.[11] <- t256; - bmi <@ __rol_4u64 (t256, 10); - t256 <- VPANDN_256 bme bmi; - t256 <- (t256 `^` bma); - e_4x.[15] <- t256; - t256 <- a_4x.[17]; - t256 <- (t256 `^` di); - a_4x.[17] <- t256; - bmo <@ __rol_4u64 (t256, 15); - t256 <- VPANDN_256 bmi bmo; - t256 <- (t256 `^` bme); - e_4x.[16] <- t256; - t256 <- a_4x.[23]; - t256 <- (t256 `^` do_0); - a_4x.[23] <- t256; - bmu <@ __rol_4u64_rho56 (t256); - t256 <- VPANDN_256 bmo bmu; - t256 <- (t256 `^` bmi); - e_4x.[17] <- t256; - t256 <- VPANDN_256 bmu bma; - t256 <- (t256 `^` bmo); - e_4x.[18] <- t256; - t256 <- VPANDN_256 bma bme; - t256 <- (t256 `^` bmu); - e_4x.[19] <- t256; - return (a_4x, e_4x); - } - - proc __sixth_last (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, da:W256.t, - de:W256.t, di:W256.t, do_0:W256.t, du:W256.t) : - W256.t Array25.t * W256.t Array25.t = { - - var t256:W256.t; - var bsa:W256.t; - var bse:W256.t; - var bsi:W256.t; - var bso:W256.t; - var bsu:W256.t; - - t256 <- a_4x.[2]; - t256 <- (t256 `^` di); - a_4x.[2] <- t256; - bsa <@ __rol_4u64 (t256, 62); - t256 <- a_4x.[8]; - t256 <- (t256 `^` do_0); - a_4x.[8] <- t256; - bse <@ __rol_4u64 (t256, 55); - t256 <- a_4x.[14]; - t256 <- (t256 `^` du); - a_4x.[14] <- t256; - bsi <@ __rol_4u64 (t256, 39); - t256 <- VPANDN_256 bse bsi; - t256 <- (t256 `^` bsa); - e_4x.[20] <- t256; - t256 <- a_4x.[15]; - t256 <- (t256 `^` da); - a_4x.[15] <- t256; - bso <@ __rol_4u64 (t256, 41); - t256 <- VPANDN_256 bsi bso; - t256 <- (t256 `^` bse); - e_4x.[21] <- t256; - t256 <- a_4x.[21]; - t256 <- (t256 `^` de); - a_4x.[21] <- t256; - bsu <@ __rol_4u64 (t256, 2); - t256 <- VPANDN_256 bso bsu; - t256 <- (t256 `^` bsi); - e_4x.[22] <- t256; - t256 <- VPANDN_256 bsu bsa; - t256 <- (t256 `^` bso); - e_4x.[23] <- t256; - t256 <- VPANDN_256 bsa bse; - t256 <- (t256 `^` bsu); - e_4x.[24] <- t256; - return (a_4x, e_4x); - } - - proc __theta_rho_pi_chi_iota_prepare_theta_even (a_4x:W256.t Array25.t, - e_4x:W256.t Array25.t, - index:int, ca:W256.t, - ce:W256.t, ci:W256.t, - co:W256.t, cu:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var da:W256.t; - var de:W256.t; - var di:W256.t; - var do_0:W256.t; - var du:W256.t; - - (da, de, di, do_0, du) <@ __first (ca, ce, ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __second_even (a_4x, e_4x, index, ca, - ce, ci, co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __third_even (a_4x, e_4x, ca, ce, ci, - co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __fourth_even (a_4x, e_4x, ca, ce, - ci, co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __fifth_even (a_4x, e_4x, ca, ce, ci, - co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __sixth_even (a_4x, e_4x, ca, ce, ci, - co, cu, da, de, di, do_0, du); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __theta_rho_pi_chi_iota_prepare_theta_odd (a_4x:W256.t Array25.t, - e_4x:W256.t Array25.t, - index:int, ca:W256.t, - ce:W256.t, ci:W256.t, - co:W256.t, cu:W256.t) : - W256.t Array25.t * W256.t Array25.t * W256.t * W256.t * W256.t * W256.t * - W256.t = { - - var da:W256.t; - var de:W256.t; - var di:W256.t; - var do_0:W256.t; - var du:W256.t; - - (da, de, di, do_0, du) <@ __first (ca, ce, ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __second_odd (a_4x, e_4x, index, ca, - ce, ci, co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __third_odd (a_4x, e_4x, ca, ce, ci, - co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __fourth_odd (a_4x, e_4x, ca, ce, ci, - co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __fifth_odd (a_4x, e_4x, ca, ce, ci, - co, cu, da, de, di, do_0, du); - (a_4x, e_4x, ca, ce, ci, co, cu) <@ __sixth_odd (a_4x, e_4x, ca, ce, ci, - co, cu, da, de, di, do_0, du); - return (a_4x, e_4x, ca, ce, ci, co, cu); - } - - proc __theta_rho_pi_chi_iota (a_4x:W256.t Array25.t, e_4x:W256.t Array25.t, - index:int, ca:W256.t, ce:W256.t, ci:W256.t, - co:W256.t, cu:W256.t) : W256.t Array25.t * - W256.t Array25.t = { - - var da:W256.t; - var de:W256.t; - var di:W256.t; - var do_0:W256.t; - var du:W256.t; - - (da, de, di, do_0, du) <@ __first (ca, ce, ci, co, cu); - (a_4x, e_4x) <@ __second_last (a_4x, e_4x, index, da, de, di, do_0, du); - (a_4x, e_4x) <@ __third_last (a_4x, e_4x, da, de, di, do_0, du); - (a_4x, e_4x) <@ __fourth_last (a_4x, e_4x, da, de, di, do_0, du); - (a_4x, e_4x) <@ __fifth_last (a_4x, e_4x, da, de, di, do_0, du); - (a_4x, e_4x) <@ __sixth_last (a_4x, e_4x, da, de, di, do_0, du); - return (a_4x, e_4x); - } - - proc _KeccakF1600_StatePermute4x (a_4x:W256.t Array25.t) : W256.t Array25.t = { - - var ca:W256.t; - var ce:W256.t; - var ci:W256.t; - var co:W256.t; - var cu:W256.t; - var e_4x:W256.t Array25.t; - e_4x <- witness; - (ca, ce, ci, co, cu) <@ __prepare_theta (a_4x); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 0, ca, ce, - ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 1, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 2, ca, ce, - ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 3, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 4, ca, ce, - ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 5, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 6, ca, ce, - ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 7, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 8, ca, ce, - ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 9, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 10, ca, - ce, ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 11, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 12, ca, - ce, ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 13, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 14, ca, - ce, ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 15, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 16, ca, - ce, ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 17, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 18, ca, - ce, ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 19, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 20, ca, - ce, ci, co, cu); - (e_4x, a_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_odd (e_4x, a_4x, 21, ca, ce, - ci, co, cu); - (a_4x, e_4x, ca, ce, ci, co, - cu) <@ __theta_rho_pi_chi_iota_prepare_theta_even (a_4x, e_4x, 22, ca, - ce, ci, co, cu); - (e_4x, a_4x) <@ __theta_rho_pi_chi_iota (e_4x, a_4x, 23, ca, ce, ci, co, - cu); - return (a_4x); - } - - proc _shake256_absorb4x_33 (s:W256.t Array25.t, m0:W8.t Array33.t, - m1:W8.t Array33.t, m2:W8.t Array33.t, - m3:W8.t Array33.t) : W256.t Array25.t = { - var aux: int; - - var t0:W256.t; - var i:int; - var t64:W64.t; - var t8:W8.t; - var t1:W256.t; - - i <- 0; - while (i < 25) { - t0 <- set0_256 ; - s.[i] <- t0; - i <- i + 1; - } - i <- 0; - while (i < 4) { - t64 <- (get64 (WArray33.init8 (fun i_0 => (m0).[i_0])) i); - s <- - Array25.init - (WArray800.get256 (WArray800.set64 (WArray800.init256 (fun i_0 => (s).[i_0])) (4 * i) (( - (get64 (WArray800.init256 (fun i_0 => (s).[i_0])) (4 * i)) `^` t64)))); - t64 <- (get64 (WArray33.init8 (fun i_0 => (m1).[i_0])) i); - s <- - Array25.init - (WArray800.get256 (WArray800.set64 (WArray800.init256 (fun i_0 => (s).[i_0])) ((4 * i) + 1) (( - (get64 (WArray800.init256 (fun i_0 => (s).[i_0])) ((4 * i) + 1)) `^` t64)))); - t64 <- (get64 (WArray33.init8 (fun i_0 => (m2).[i_0])) i); - s <- - Array25.init - (WArray800.get256 (WArray800.set64 (WArray800.init256 (fun i_0 => (s).[i_0])) ((4 * i) + 2) (( - (get64 (WArray800.init256 (fun i_0 => (s).[i_0])) ((4 * i) + 2)) `^` t64)))); - t64 <- (get64 (WArray33.init8 (fun i_0 => (m3).[i_0])) i); - s <- - Array25.init - (WArray800.get256 (WArray800.set64 (WArray800.init256 (fun i_0 => (s).[i_0])) ((4 * i) + 3) (( - (get64 (WArray800.init256 (fun i_0 => (s).[i_0])) ((4 * i) + 3)) `^` t64)))); - i <- i + 1; - } - t8 <- m0.[32]; - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 128 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 128) `^` t8)))); - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 129 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 129) `^` (W8.of_int 31))))); - t8 <- m1.[32]; - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 136 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 136) `^` t8)))); - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 137 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 137) `^` (W8.of_int 31))))); - t8 <- m2.[32]; - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 144 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 144) `^` t8)))); - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 145 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 145) `^` (W8.of_int 31))))); - t8 <- m3.[32]; - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 152 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 152) `^` t8)))); - s <- - Array25.init - (WArray800.get256 (WArray800.set8 (WArray800.init256 (fun i_0 => (s).[i_0])) 153 (( - (get8 (WArray800.init256 (fun i_0 => (s).[i_0])) 153) `^` (W8.of_int 31))))); - t0 <- (get256 (WArray32.init64 (fun i_0 => (shake_sep).[i_0])) 0); - t1 <- s.[((136 %/ 8) - 1)]; - t0 <- (t0 `^` t1); - s.[((136 %/ 8) - 1)] <- t0; - return (s); - } - - proc __shake256_squeezeblock4x (state:W256.t Array25.t, h0:W8.t Array136.t, - h1:W8.t Array136.t, h2:W8.t Array136.t, - h3:W8.t Array136.t) : W256.t Array25.t * - W8.t Array136.t * - W8.t Array136.t * - W8.t Array136.t * - W8.t Array136.t = { - var aux: int; - - var i:int; - var t256:W256.t; - var t128:W128.t; - - state <@ _KeccakF1600_StatePermute4x (state); - aux <- (136 %/ 8); - i <- 0; - while (i < aux) { - t256 <- state.[i]; - t128 <- (truncateu128 t256); - h0 <- - Array136.init - (WArray136.get8 (WArray136.set64 (WArray136.init8 (fun i_0 => (h0).[i_0])) i (VMOVLPD t128))); - h1 <- - Array136.init - (WArray136.get8 (WArray136.set64 (WArray136.init8 (fun i_0 => (h1).[i_0])) i (VMOVHPD t128))); - t128 <- VEXTRACTI128 t256 (W8.of_int 1); - h2 <- - Array136.init - (WArray136.get8 (WArray136.set64 (WArray136.init8 (fun i_0 => (h2).[i_0])) i (VMOVLPD t128))); - h3 <- - Array136.init - (WArray136.get8 (WArray136.set64 (WArray136.init8 (fun i_0 => (h3).[i_0])) i (VMOVHPD t128))); - i <- i + 1; - } - return (state, h0, h1, h2, h3); - } - - proc _poly_add2 (rp:W16.t Array256.t, bp:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var i:int; - var a:W256.t; - var b:W256.t; - var r:W256.t; - - i <- 0; - while (i < 16) { - a <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * i)); - b <- - (get256_direct (WArray512.init16 (fun i_0 => (bp).[i_0])) (32 * i)); - r <- VPADD_16u16 a b; - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * i) (r))); - i <- i + 1; - } - return (rp); - } - - proc _poly_csubq (rp:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var qx16:W256.t; - var i:int; - var r:W256.t; - - qx16 <- (get256 (WArray32.init16 (fun i_0 => (jqx16).[i_0])) 0); - i <- 0; - while (i < 16) { - r <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * i)); - r <@ __csubq (r, qx16); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * i) (r))); - i <- i + 1; - } - return (rp); - } - - proc __w256_interleave_u16 (al:W256.t, ah:W256.t) : W256.t * W256.t = { - - var a0:W256.t; - var a1:W256.t; - - a0 <- VPUNPCKL_16u16 al ah; - a1 <- VPUNPCKH_16u16 al ah; - return (a0, a1); - } - - proc __w256_deinterleave_u16 (_zero:W256.t, a0:W256.t, a1:W256.t) : - W256.t * W256.t = { - - var al:W256.t; - var ah:W256.t; - - al <- VPBLEND_16u16 a0 _zero (W8.of_int 170); - ah <- VPBLEND_16u16 a1 _zero (W8.of_int 170); - al <- VPACKUS_8u32 al ah; - a0 <- VPSRL_8u32 a0 (W8.of_int 16); - a1 <- VPSRL_8u32 a1 (W8.of_int 16); - ah <- VPACKUS_8u32 a0 a1; - return (al, ah); - } - - proc __mont_red (lo:W256.t, hi:W256.t, qx16:W256.t, qinvx16:W256.t) : - W256.t = { - - var m:W256.t; - - m <- VPMULL_16u16 lo qinvx16; - m <- VPMULH_16u16 m qx16; - lo <- VPSUB_16u16 hi m; - return (lo); - } - - proc __wmul_16u16 (x:W256.t, y:W256.t) : W256.t * W256.t = { - - var xy0:W256.t; - var xy1:W256.t; - var xyL:W256.t; - var xyH:W256.t; - - xyL <- VPMULL_16u16 x y; - xyH <- VPMULH_16u16 x y; - (xy0, xy1) <@ __w256_interleave_u16 (xyL, xyH); - return (xy0, xy1); - } - - proc __schoolbook16x (are:W256.t, aim:W256.t, bre:W256.t, bim:W256.t, - zeta_0:W256.t, zetaqinv:W256.t, qx16:W256.t, - qinvx16:W256.t, sign:int) : W256.t * W256.t = { - - var x0:W256.t; - var y0:W256.t; - var zaim:W256.t; - var ac0:W256.t; - var ac1:W256.t; - var ad0:W256.t; - var ad1:W256.t; - var bc0:W256.t; - var bc1:W256.t; - var zbd0:W256.t; - var zbd1:W256.t; - var x1:W256.t; - var y1:W256.t; - var _zero:W256.t; - - zaim <@ __fqmulprecomp16x (aim, zetaqinv, zeta_0, qx16); - (ac0, ac1) <@ __wmul_16u16 (are, bre); - (ad0, ad1) <@ __wmul_16u16 (are, bim); - (bc0, bc1) <@ __wmul_16u16 (aim, bre); - (zbd0, zbd1) <@ __wmul_16u16 (zaim, bim); - if ((sign = 0)) { - x0 <- VPADD_8u32 ac0 zbd0; - x1 <- VPADD_8u32 ac1 zbd1; - } else { - x0 <- VPSUB_8u32 ac0 zbd0; - x1 <- VPSUB_8u32 ac1 zbd1; - } - y0 <- VPADD_8u32 bc0 ad0; - y1 <- VPADD_8u32 bc1 ad1; - _zero <- set0_256 ; - (x0, x1) <@ __w256_deinterleave_u16 (_zero, x0, x1); - (y0, y1) <@ __w256_deinterleave_u16 (_zero, y0, y1); - x0 <@ __mont_red (x0, x1, qx16, qinvx16); - y0 <@ __mont_red (y0, y1, qx16, qinvx16); - return (x0, y0); - } - - proc _poly_basemul (rp:W16.t Array256.t, ap:W16.t Array256.t, - bp:W16.t Array256.t) : W16.t Array256.t = { - - var qx16:W256.t; - var qinvx16:W256.t; - var zetaqinv:W256.t; - var zeta_0:W256.t; - var are:W256.t; - var aim:W256.t; - var bre:W256.t; - var bim:W256.t; - - qx16 <- (get256_direct (WArray32.init16 (fun i => (jqx16).[i])) 0); - qinvx16 <- (get256_direct (WArray32.init16 (fun i => (jqinvx16).[i])) 0); - zetaqinv <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 272); - zeta_0 <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 304); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 0)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 1)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 0)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 1)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 0); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 0) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 1) (aim))); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 2)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 3)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 2)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 3)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 1); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 2) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 3) (aim))); - zetaqinv <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 336); - zeta_0 <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 368); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 4)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 5)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 4)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 5)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 0); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 4) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 5) (aim))); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 6)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 7)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 6)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 7)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 1); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 6) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 7) (aim))); - zetaqinv <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 664); - zeta_0 <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 696); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 8)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 9)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 8)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 9)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 0); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 8) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 9) (aim))); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 10)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 11)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 10)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 11)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 1); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 10) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 11) (aim))); - zetaqinv <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 728); - zeta_0 <- - (get256_direct (WArray800.init16 (fun i => (jzetas_exp).[i])) 760); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 12)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 13)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 12)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 13)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 0); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 12) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 13) (aim))); - are <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 14)); - aim <- (get256_direct (WArray512.init16 (fun i => (ap).[i])) (32 * 15)); - bre <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 14)); - bim <- (get256_direct (WArray512.init16 (fun i => (bp).[i])) (32 * 15)); - (are, aim) <@ __schoolbook16x (are, aim, bre, bim, zeta_0, zetaqinv, - qx16, qinvx16, 1); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 14) (are))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i => (rp).[i])) (32 * 15) (aim))); - return (rp); - } - - proc _poly_compress (rp:W64.t, a:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var x16p:W16.t Array16.t; - var v:W256.t; - var shift1:W256.t; - var mask:W256.t; - var shift2:W256.t; - var permidx:W256.t; - var i:int; - var f0:W256.t; - var f1:W256.t; - var f2:W256.t; - var f3:W256.t; - x16p <- witness; - a <@ _poly_csubq (a); - x16p <- jvx16; - v <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - shift1 <- VPBROADCAST_16u16 pc_shift1_s; - mask <- VPBROADCAST_16u16 pc_mask_s; - shift2 <- VPBROADCAST_16u16 pc_shift2_s; - permidx <- - (get256 (WArray32.init32 (fun i_0 => (pc_permidx_s).[i_0])) 0); - aux <- (256 %/ 64); - i <- 0; - while (i < aux) { - f0 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) (4 * i)); - f1 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((4 * i) + 1)); - f2 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((4 * i) + 2)); - f3 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((4 * i) + 3)); - f0 <- VPMULH_16u16 f0 v; - f1 <- VPMULH_16u16 f1 v; - f2 <- VPMULH_16u16 f2 v; - f3 <- VPMULH_16u16 f3 v; - f0 <- VPMULHRS_16u16 f0 shift1; - f1 <- VPMULHRS_16u16 f1 shift1; - f2 <- VPMULHRS_16u16 f2 shift1; - f3 <- VPMULHRS_16u16 f3 shift1; - f0 <- VPAND_256 f0 mask; - f1 <- VPAND_256 f1 mask; - f2 <- VPAND_256 f2 mask; - f3 <- VPAND_256 f3 mask; - f0 <- VPACKUS_16u16 f0 f1; - f2 <- VPACKUS_16u16 f2 f3; - f0 <- VPMADDUBSW_256 f0 shift2; - f2 <- VPMADDUBSW_256 f2 shift2; - f0 <- VPACKUS_16u16 f0 f2; - f0 <- VPERMD permidx f0; - Glob.mem <- - storeW256 Glob.mem (W64.to_uint (rp + (W64.of_int (32 * i)))) (f0); - i <- i + 1; - } - return (a); - } - - proc _poly_compress_1 (rp:W8.t Array128.t, a:W16.t Array256.t) : W8.t Array128.t * - W16.t Array256.t = { - var aux: int; - - var x16p:W16.t Array16.t; - var v:W256.t; - var shift1:W256.t; - var mask:W256.t; - var shift2:W256.t; - var permidx:W256.t; - var i:int; - var f0:W256.t; - var f1:W256.t; - var f2:W256.t; - var f3:W256.t; - x16p <- witness; - a <@ _poly_csubq (a); - x16p <- jvx16; - v <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - shift1 <- VPBROADCAST_16u16 pc_shift1_s; - mask <- VPBROADCAST_16u16 pc_mask_s; - shift2 <- VPBROADCAST_16u16 pc_shift2_s; - permidx <- - (get256 (WArray32.init32 (fun i_0 => (pc_permidx_s).[i_0])) 0); - aux <- (256 %/ 64); - i <- 0; - while (i < aux) { - f0 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) (4 * i)); - f1 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((4 * i) + 1)); - f2 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((4 * i) + 2)); - f3 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((4 * i) + 3)); - f0 <- VPMULH_16u16 f0 v; - f1 <- VPMULH_16u16 f1 v; - f2 <- VPMULH_16u16 f2 v; - f3 <- VPMULH_16u16 f3 v; - f0 <- VPMULHRS_16u16 f0 shift1; - f1 <- VPMULHRS_16u16 f1 shift1; - f2 <- VPMULHRS_16u16 f2 shift1; - f3 <- VPMULHRS_16u16 f3 shift1; - f0 <- VPAND_256 f0 mask; - f1 <- VPAND_256 f1 mask; - f2 <- VPAND_256 f2 mask; - f3 <- VPAND_256 f3 mask; - f0 <- VPACKUS_16u16 f0 f1; - f2 <- VPACKUS_16u16 f2 f3; - f0 <- VPMADDUBSW_256 f0 shift2; - f2 <- VPMADDUBSW_256 f2 shift2; - f0 <- VPACKUS_16u16 f0 f2; - f0 <- VPERMD permidx f0; - rp <- - Array128.init - (WArray128.get8 (WArray128.set256_direct (WArray128.init8 (fun i_0 => (rp).[i_0])) (32 * i) (f0))); - i <- i + 1; - } - return (rp, a); - } - - proc _poly_decompress (rp:W16.t Array256.t, ap:W64.t) : W16.t Array256.t = { - var aux: int; - - var x16p:W16.t Array16.t; - var q:W256.t; - var x32p:W8.t Array32.t; - var shufbidx:W256.t; - var mask:W256.t; - var shift:W256.t; - var f:W256.t; - var i:int; - x16p <- witness; - x32p <- witness; - x16p <- jqx16; - q <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - x32p <- pd_jshufbidx; - shufbidx <- (get256 (WArray32.init8 (fun i_0 => (x32p).[i_0])) 0); - mask <- VPBROADCAST_8u32 pd_mask_s; - shift <- VPBROADCAST_8u32 pd_shift_s; - f <- set0_256 ; - aux <- (256 %/ 16); - i <- 0; - while (i < aux) { - f <- - VPBROADCAST_2u128 (loadW128 Glob.mem (W64.to_uint (ap + (W64.of_int (8 * i))))); - f <- VPSHUFB_256 f shufbidx; - f <- VPAND_256 f mask; - f <- VPMULL_16u16 f shift; - f <- VPMULHRS_16u16 f q; - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) i (f))); - i <- i + 1; - } - return (rp); - } - - proc _poly_frombytes (rp:W16.t Array256.t, ap:W64.t) : W16.t Array256.t = { - var aux: int; - - var maskp:W16.t Array16.t; - var mask:W256.t; - var i:int; - var t0:W256.t; - var t1:W256.t; - var t2:W256.t; - var t3:W256.t; - var t4:W256.t; - var t5:W256.t; - var tt:W256.t; - var t6:W256.t; - var t7:W256.t; - var t8:W256.t; - var t9:W256.t; - var t10:W256.t; - var t11:W256.t; - maskp <- witness; - maskp <- maskx16; - mask <- (get256 (WArray32.init16 (fun i_0 => (maskp).[i_0])) 0); - i <- 0; - while (i < 2) { - t0 <- (loadW256 Glob.mem (W64.to_uint (ap + (W64.of_int (192 * i))))); - t1 <- - (loadW256 Glob.mem (W64.to_uint (ap + (W64.of_int ((192 * i) + 32))))); - t2 <- - (loadW256 Glob.mem (W64.to_uint (ap + (W64.of_int ((192 * i) + 64))))); - t3 <- - (loadW256 Glob.mem (W64.to_uint (ap + (W64.of_int ((192 * i) + 96))))); - t4 <- - (loadW256 Glob.mem (W64.to_uint (ap + (W64.of_int ((192 * i) + 128))))); - t5 <- - (loadW256 Glob.mem (W64.to_uint (ap + (W64.of_int ((192 * i) + 160))))); - (tt, t3) <@ __shuffle8 (t0, t3); - (t0, t4) <@ __shuffle8 (t1, t4); - (t1, t5) <@ __shuffle8 (t2, t5); - (t2, t4) <@ __shuffle4 (tt, t4); - (tt, t1) <@ __shuffle4 (t3, t1); - (t3, t5) <@ __shuffle4 (t0, t5); - (t0, t1) <@ __shuffle2 (t2, t1); - (t2, t3) <@ __shuffle2 (t4, t3); - (t4, t5) <@ __shuffle2 (tt, t5); - (t6, t3) <@ __shuffle1 (t0, t3); - (t0, t4) <@ __shuffle1 (t1, t4); - (t1, t5) <@ __shuffle1 (t2, t5); - t7 <- VPSRL_16u16 t6 (W8.of_int 12); - t8 <- VPSLL_16u16 t3 (W8.of_int 4); - t7 <- VPOR_256 t7 t8; - t6 <- VPAND_256 mask t6; - t7 <- VPAND_256 mask t7; - t8 <- VPSRL_16u16 t3 (W8.of_int 8); - t9 <- VPSLL_16u16 t0 (W8.of_int 8); - t8 <- VPOR_256 t8 t9; - t8 <- VPAND_256 mask t8; - t9 <- VPSRL_16u16 t0 (W8.of_int 4); - t9 <- VPAND_256 mask t9; - t10 <- VPSRL_16u16 t4 (W8.of_int 12); - t11 <- VPSLL_16u16 t1 (W8.of_int 4); - t10 <- VPOR_256 t10 t11; - t4 <- VPAND_256 mask t4; - t10 <- VPAND_256 mask t10; - t11 <- VPSRL_16u16 t1 (W8.of_int 8); - tt <- VPSLL_16u16 t5 (W8.of_int 8); - t11 <- VPOR_256 t11 tt; - t11 <- VPAND_256 mask t11; - tt <- VPSRL_16u16 t5 (W8.of_int 4); - tt <- VPAND_256 mask tt; - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) (8 * i) (t6))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((8 * i) + 1) (t7))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((8 * i) + 2) (t8))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((8 * i) + 3) (t9))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((8 * i) + 4) (t4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((8 * i) + 5) (t10))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((8 * i) + 6) (t11))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((8 * i) + 7) (tt))); - i <- i + 1; - } - return (rp); - } - - proc _poly_frommont (rp:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var x16p:W16.t Array16.t; - var qx16:W256.t; - var qinvx16:W256.t; - var dmontx16:W256.t; - var i:int; - var t:W256.t; - x16p <- witness; - x16p <- jqx16; - qx16 <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - x16p <- jqinvx16; - qinvx16 <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - x16p <- jdmontx16; - dmontx16 <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - aux <- (256 %/ 16); - i <- 0; - while (i < aux) { - t <- (get256 (WArray512.init16 (fun i_0 => (rp).[i_0])) i); - t <@ __fqmulx16 (t, dmontx16, qx16, qinvx16); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) i (t))); - i <- i + 1; - } - return (rp); - } - - proc _poly_frommsg_1 (rp:W16.t Array256.t, ap:W8.t Array32.t) : W16.t Array256.t = { - var aux: int; - - var x16p:W16.t Array16.t; - var hqs:W256.t; - var shift:W256.t; - var idx:W256.t; - var f:W256.t; - var i:int; - var g3:W256.t; - var g0:W256.t; - var g1:W256.t; - var g2:W256.t; - var h0:W256.t; - var h2:W256.t; - var h1:W256.t; - var h3:W256.t; - x16p <- witness; - x16p <- hqx16_p1; - hqs <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - shift <- - VPBROADCAST_2u128 (get128 - (WArray16.init32 (fun i_0 => (pfm_shift_s).[i_0])) 0); - idx <- - VPBROADCAST_2u128 (get128 (WArray16.init8 (fun i_0 => (pfm_idx_s).[i_0])) - 0); - f <- (get256 (WArray32.init8 (fun i_0 => (ap).[i_0])) 0); - i <- 0; - while (i < 4) { - g3 <- VPSHUFD_256 f (W8.of_int (85 * i)); - g3 <- VPSLLV_8u32 g3 shift; - g3 <- VPSHUFB_256 g3 idx; - g0 <- VPSLL_16u16 g3 (W8.of_int 12); - g1 <- VPSLL_16u16 g3 (W8.of_int 8); - g2 <- VPSLL_16u16 g3 (W8.of_int 4); - g0 <- VPSRA_16u16 g0 (W8.of_int 15); - g1 <- VPSRA_16u16 g1 (W8.of_int 15); - g2 <- VPSRA_16u16 g2 (W8.of_int 15); - g3 <- VPSRA_16u16 g3 (W8.of_int 15); - g0 <- VPAND_256 g0 hqs; - g1 <- VPAND_256 g1 hqs; - g2 <- VPAND_256 g2 hqs; - g3 <- VPAND_256 g3 hqs; - h0 <- VPUNPCKL_4u64 g0 g1; - h2 <- VPUNPCKH_4u64 g0 g1; - h1 <- VPUNPCKL_4u64 g2 g3; - h3 <- VPUNPCKH_4u64 g2 g3; - g0 <- VPERM2I128 h0 h1 (W8.of_int 32); - g2 <- VPERM2I128 h0 h1 (W8.of_int 49); - g1 <- VPERM2I128 h2 h3 (W8.of_int 32); - g3 <- VPERM2I128 h2 h3 (W8.of_int 49); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) (2 * i) (g0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((2 * i) + 1) (g1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((2 * i) + 8) (g2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) (((2 * i) + 8) + 1) (g3))); - i <- i + 1; - } - return (rp); - } - - proc __cbd3 (rp:W16.t Array256.t, buf:W8.t Array128.t) : W16.t Array256.t = { - var aux: int; - - var mask249_s:W32.t; - var mask6DB_s:W32.t; - var mask07_s:W32.t; - var mask70_s:W32.t; - var mask3_s:W16.t; - var mask249:W256.t; - var mask6DB:W256.t; - var mask07:W256.t; - var mask70:W256.t; - var mask3:W256.t; - var shufbidx:W256.t; - var i:int; - var f0:W256.t; - var f1:W256.t; - var f2:W256.t; - var f3:W256.t; - - mask249_s <- (W32.of_int 2396745); - mask6DB_s <- (W32.of_int 7190235); - mask07_s <- (W32.of_int 7); - mask70_s <- (W32.of_int (7 `<<` 16)); - mask3_s <- (W16.of_int 3); - mask249 <- VPBROADCAST_8u32 mask249_s; - mask6DB <- VPBROADCAST_8u32 mask6DB_s; - mask07 <- VPBROADCAST_8u32 mask07_s; - mask70 <- VPBROADCAST_8u32 mask70_s; - mask3 <- VPBROADCAST_16u16 mask3_s; - shufbidx <- - (get256 (WArray32.init8 (fun i_0 => (cbd_jshufbidx).[i_0])) 0); - aux <- (256 %/ 32); - i <- 0; - while (i < aux) { - f0 <- - (get256_direct (WArray128.init8 (fun i_0 => (buf).[i_0])) (24 * i)); - f0 <- VPERMQ f0 (W8.of_int 148); - f0 <- VPSHUFB_256 f0 shufbidx; - f1 <- VPSRL_8u32 f0 (W8.of_int 1); - f2 <- VPSRL_8u32 f0 (W8.of_int 2); - f0 <- VPAND_256 mask249 f0; - f1 <- VPAND_256 mask249 f1; - f2 <- VPAND_256 mask249 f2; - f0 <- VPADD_8u32 f0 f1; - f0 <- VPADD_8u32 f0 f2; - f1 <- VPSRL_8u32 f0 (W8.of_int 3); - f0 <- VPADD_8u32 f0 mask6DB; - f0 <- VPSUB_8u32 f0 f1; - f1 <- VPSLL_8u32 f0 (W8.of_int 10); - f2 <- VPSRL_8u32 f0 (W8.of_int 12); - f3 <- VPSRL_8u32 f0 (W8.of_int 2); - f0 <- VPAND_256 f0 mask07; - f1 <- VPAND_256 f1 mask70; - f2 <- VPAND_256 f2 mask07; - f3 <- VPAND_256 f3 mask70; - f0 <- VPADD_16u16 f0 f1; - f1 <- VPADD_16u16 f2 f3; - f0 <- VPSUB_16u16 f0 mask3; - f1 <- VPSUB_16u16 f1 mask3; - f2 <- VPUNPCKL_8u32 f0 f1; - f3 <- VPUNPCKH_8u32 f0 f1; - f0 <- VPERM2I128 f2 f3 (W8.of_int 32); - f1 <- VPERM2I128 f2 f3 (W8.of_int 49); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) (2 * i) (f0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((2 * i) + 1) (f1))); - i <- i + 1; - } - return (rp); - } - - proc __cbd2 (rp:W16.t Array256.t, buf:W8.t Array128.t) : W16.t Array256.t = { - var aux: int; - - var mask55_s:W32.t; - var mask33_s:W32.t; - var mask03_s:W32.t; - var mask0F_s:W32.t; - var mask55:W256.t; - var mask33:W256.t; - var mask03:W256.t; - var mask0F:W256.t; - var i:int; - var f0:W256.t; - var f1:W256.t; - var f2:W256.t; - var f3:W256.t; - var t:W128.t; - - mask55_s <- (W32.of_int 1431655765); - mask33_s <- (W32.of_int 858993459); - mask03_s <- (W32.of_int 50529027); - mask0F_s <- (W32.of_int 252645135); - mask55 <- VPBROADCAST_8u32 mask55_s; - mask33 <- VPBROADCAST_8u32 mask33_s; - mask03 <- VPBROADCAST_8u32 mask03_s; - mask0F <- VPBROADCAST_8u32 mask0F_s; - aux <- (256 %/ 64); - i <- 0; - while (i < aux) { - f0 <- (get256 (WArray128.init8 (fun i_0 => (buf).[i_0])) i); - f1 <- VPSRL_16u16 f0 (W8.of_int 1); - f0 <- VPAND_256 mask55 f0; - f1 <- VPAND_256 mask55 f1; - f0 <- VPADD_32u8 f0 f1; - f1 <- VPSRL_16u16 f0 (W8.of_int 2); - f0 <- VPAND_256 mask33 f0; - f1 <- VPAND_256 mask33 f1; - f0 <- VPADD_32u8 f0 mask33; - f0 <- VPSUB_32u8 f0 f1; - f1 <- VPSRL_16u16 f0 (W8.of_int 4); - f0 <- VPAND_256 mask0F f0; - f1 <- VPAND_256 mask0F f1; - f0 <- VPSUB_32u8 f0 mask03; - f1 <- VPSUB_32u8 f1 mask03; - f2 <- VPUNPCKL_32u8 f0 f1; - f3 <- VPUNPCKH_32u8 f0 f1; - t <- (truncateu128 f2); - f0 <- VPMOVSX_16u8_16u16 t; - t <- VEXTRACTI128 f2 (W8.of_int 1); - f1 <- VPMOVSX_16u8_16u16 t; - t <- (truncateu128 f3); - f2 <- VPMOVSX_16u8_16u16 t; - t <- VEXTRACTI128 f3 (W8.of_int 1); - f3 <- VPMOVSX_16u8_16u16 t; - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) (4 * i) (f0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((4 * i) + 1) (f2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((4 * i) + 2) (f1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256 (WArray512.init16 (fun i_0 => (rp).[i_0])) ((4 * i) + 3) (f3))); - i <- i + 1; - } - return (rp); - } - - proc __poly_cbd_eta1 (rp:W16.t Array256.t, buf:W8.t Array128.t) : W16.t Array256.t = { - - - - if ((2 = 2)) { - rp <@ __cbd2 (rp, (Array128.init (fun i => buf.[0 + i]))); - } else { - rp <@ __cbd3 (rp, buf); - } - return (rp); - } - - proc __shake256_squeezenblocks4x (state:W256.t Array25.t, - buf0:W8.t Array136.t, - buf1:W8.t Array136.t, - buf2:W8.t Array136.t, - buf3:W8.t Array136.t) : W256.t Array25.t * - W8.t Array136.t * - W8.t Array136.t * - W8.t Array136.t * - W8.t Array136.t = { - var aux: int; - var aux_4: W8.t Array136.t; - var aux_3: W8.t Array136.t; - var aux_2: W8.t Array136.t; - var aux_1: W8.t Array136.t; - var aux_0: W256.t Array25.t; - - var i:int; - - aux <- (((((2 * 256) %/ 4) + 136) - 1) %/ 136); - i <- 0; - while (i < aux) { - (aux_0, aux_4, aux_3, aux_2, - aux_1) <@ __shake256_squeezeblock4x (state, - (Array136.init (fun i_0 => buf0.[(i * 136) + i_0])), - (Array136.init (fun i_0 => buf1.[(i * 136) + i_0])), - (Array136.init (fun i_0 => buf2.[(i * 136) + i_0])), - (Array136.init (fun i_0 => buf3.[(i * 136) + i_0]))); - state <- aux_0; - buf0 <- Array136.init - (fun i_0 => if (i * 136) <= i_0 < (i * 136) + 136 - then aux_4.[i_0-(i * 136)] else buf0.[i_0]); - buf1 <- Array136.init - (fun i_0 => if (i * 136) <= i_0 < (i * 136) + 136 - then aux_3.[i_0-(i * 136)] else buf1.[i_0]); - buf2 <- Array136.init - (fun i_0 => if (i * 136) <= i_0 < (i * 136) + 136 - then aux_2.[i_0-(i * 136)] else buf2.[i_0]); - buf3 <- Array136.init - (fun i_0 => if (i * 136) <= i_0 < (i * 136) + 136 - then aux_1.[i_0-(i * 136)] else buf3.[i_0]); - i <- i + 1; - } - return (state, buf0, buf1, buf2, buf3); - } - - proc _poly_getnoise_eta1_4x (r0:W16.t Array256.t, r1:W16.t Array256.t, - r2:W16.t Array256.t, r3:W16.t Array256.t, - seed:W8.t Array32.t, nonce:W8.t) : W16.t Array256.t * - W16.t Array256.t * - W16.t Array256.t * - W16.t Array256.t = { - - var f:W256.t; - var buf0:W8.t Array136.t; - var buf1:W8.t Array136.t; - var buf2:W8.t Array136.t; - var buf3:W8.t Array136.t; - var state:W256.t Array25.t; - buf0 <- witness; - buf1 <- witness; - buf2 <- witness; - buf3 <- witness; - state <- witness; - f <- (get256 (WArray32.init8 (fun i => (seed).[i])) 0); - buf0 <- - Array136.init - (WArray136.get8 (WArray136.set256 (WArray136.init8 (fun i => (buf0).[i])) 0 (f))); - buf1 <- - Array136.init - (WArray136.get8 (WArray136.set256 (WArray136.init8 (fun i => (buf1).[i])) 0 (f))); - buf2 <- - Array136.init - (WArray136.get8 (WArray136.set256 (WArray136.init8 (fun i => (buf2).[i])) 0 (f))); - buf3 <- - Array136.init - (WArray136.get8 (WArray136.set256 (WArray136.init8 (fun i => (buf3).[i])) 0 (f))); - buf0 <- - Array136.init - (WArray136.get8 (WArray136.set8_direct (WArray136.init8 (fun i => (buf0).[i])) 32 (nonce))); - nonce <- (nonce + (W8.of_int 1)); - buf1 <- - Array136.init - (WArray136.get8 (WArray136.set8_direct (WArray136.init8 (fun i => (buf1).[i])) 32 (nonce))); - nonce <- (nonce + (W8.of_int 1)); - buf2 <- - Array136.init - (WArray136.get8 (WArray136.set8_direct (WArray136.init8 (fun i => (buf2).[i])) 32 (nonce))); - nonce <- (nonce + (W8.of_int 1)); - buf3 <- - Array136.init - (WArray136.get8 (WArray136.set8_direct (WArray136.init8 (fun i => (buf3).[i])) 32 (nonce))); - state <@ _shake256_absorb4x_33 (state, - (Array33.init (fun i => buf0.[0 + i])), - (Array33.init (fun i => buf1.[0 + i])), - (Array33.init (fun i => buf2.[0 + i])), - (Array33.init (fun i => buf3.[0 + i]))); - (state, buf0, buf1, buf2, buf3) <@ __shake256_squeezenblocks4x (state, - buf0, buf1, buf2, buf3); - r0 <@ __poly_cbd_eta1 (r0, (Array128.init (fun i => buf0.[0 + i]))); - r1 <@ __poly_cbd_eta1 (r1, (Array128.init (fun i => buf1.[0 + i]))); - r2 <@ __poly_cbd_eta1 (r2, (Array128.init (fun i => buf2.[0 + i]))); - r3 <@ __poly_cbd_eta1 (r3, (Array128.init (fun i => buf3.[0 + i]))); - return (r0, r1, r2, r3); - } - - proc __invntt___butterfly64x (rl0:W256.t, rl1:W256.t, rl2:W256.t, - rl3:W256.t, rh0:W256.t, rh1:W256.t, - rh2:W256.t, rh3:W256.t, zl0:W256.t, - zl1:W256.t, zh0:W256.t, zh1:W256.t, - qx16:W256.t) : W256.t * W256.t * W256.t * - W256.t * W256.t * W256.t * - W256.t * W256.t = { - - var t0:W256.t; - var t1:W256.t; - var t2:W256.t; - var t3:W256.t; - - t0 <- VPSUB_16u16 rl0 rh0; - t1 <- VPSUB_16u16 rl1 rh1; - t2 <- VPSUB_16u16 rl2 rh2; - rl0 <- VPADD_16u16 rh0 rl0; - rl1 <- VPADD_16u16 rh1 rl1; - rh0 <- VPMULL_16u16 zl0 t0; - rl2 <- VPADD_16u16 rh2 rl2; - rh1 <- VPMULL_16u16 zl0 t1; - t3 <- VPSUB_16u16 rl3 rh3; - rl3 <- VPADD_16u16 rh3 rl3; - rh2 <- VPMULL_16u16 zl1 t2; - rh3 <- VPMULL_16u16 zl1 t3; - t0 <- VPMULH_16u16 zh0 t0; - t1 <- VPMULH_16u16 zh0 t1; - t2 <- VPMULH_16u16 zh1 t2; - t3 <- VPMULH_16u16 zh1 t3; - rh0 <- VPMULH_16u16 qx16 rh0; - rh1 <- VPMULH_16u16 qx16 rh1; - rh2 <- VPMULH_16u16 qx16 rh2; - rh3 <- VPMULH_16u16 qx16 rh3; - rh0 <- VPSUB_16u16 t0 rh0; - rh1 <- VPSUB_16u16 t1 rh1; - rh2 <- VPSUB_16u16 t2 rh2; - rh3 <- VPSUB_16u16 t3 rh3; - return (rl0, rl1, rl2, rl3, rh0, rh1, rh2, rh3); - } - - proc _poly_invntt (rp:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var zetasp:W16.t Array400.t; - var qx16:W256.t; - var i:int; - var zeta0:W256.t; - var zeta1:W256.t; - var zeta2:W256.t; - var zeta3:W256.t; - var r0:W256.t; - var r1:W256.t; - var r2:W256.t; - var r3:W256.t; - var r4:W256.t; - var r5:W256.t; - var r6:W256.t; - var r7:W256.t; - var vx16:W256.t; - var flox16:W256.t; - var fhix16:W256.t; - zetasp <- witness; - zetasp <- jzetas_inv_exp; - qx16 <- (get256 (WArray32.init16 (fun i_0 => (jqx16).[i_0])) 0); - i <- 0; - while (i < 2) { - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (0 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (64 + (392 * i))); - zeta2 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (32 + (392 * i))); - zeta3 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (96 + (392 * i))); - r0 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 0) + (256 * i))); - r1 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 1) + (256 * i))); - r2 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 2) + (256 * i))); - r3 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 3) + (256 * i))); - r4 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 4) + (256 * i))); - r5 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 5) + (256 * i))); - r6 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 6) + (256 * i))); - r7 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 7) + (256 * i))); - (r0, r1, r4, r5, r2, r3, r6, r7) <@ __invntt___butterfly64x (r0, r1, - r4, r5, r2, r3, r6, r7, zeta0, zeta1, zeta2, zeta3, qx16); - vx16 <- (get256 (WArray32.init16 (fun i_0 => (jvx16).[i_0])) 0); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (128 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (160 + (392 * i))); - r0 <@ __red16x (r0, qx16, vx16); - r1 <@ __red16x (r1, qx16, vx16); - r4 <@ __red16x (r4, qx16, vx16); - r5 <@ __red16x (r5, qx16, vx16); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __invntt___butterfly64x (r0, r1, - r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - (r0, r1) <@ __shuffle1 (r0, r1); - (r2, r3) <@ __shuffle1 (r2, r3); - (r4, r5) <@ __shuffle1 (r4, r5); - (r6, r7) <@ __shuffle1 (r6, r7); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (192 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (224 + (392 * i))); - (r0, r2, r4, r6, r1, r3, r5, r7) <@ __invntt___butterfly64x (r0, r2, - r4, r6, r1, r3, r5, r7, zeta0, zeta0, zeta1, zeta1, qx16); - r0 <@ __red16x (r0, qx16, vx16); - (r0, r2) <@ __shuffle2 (r0, r2); - (r4, r6) <@ __shuffle2 (r4, r6); - (r1, r3) <@ __shuffle2 (r1, r3); - (r5, r7) <@ __shuffle2 (r5, r7); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (256 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (288 + (392 * i))); - (r0, r4, r1, r5, r2, r6, r3, r7) <@ __invntt___butterfly64x (r0, r4, - r1, r5, r2, r6, r3, r7, zeta0, zeta0, zeta1, zeta1, qx16); - r0 <@ __red16x (r0, qx16, vx16); - (r0, r4) <@ __shuffle4 (r0, r4); - (r1, r5) <@ __shuffle4 (r1, r5); - (r2, r6) <@ __shuffle4 (r2, r6); - (r3, r7) <@ __shuffle4 (r3, r7); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (320 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (352 + (392 * i))); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __invntt___butterfly64x (r0, r1, - r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - r0 <@ __red16x (r0, qx16, vx16); - (r0, r1) <@ __shuffle8 (r0, r1); - (r2, r3) <@ __shuffle8 (r2, r3); - (r4, r5) <@ __shuffle8 (r4, r5); - (r6, r7) <@ __shuffle8 (r6, r7); - zeta0 <- - VPBROADCAST_8u32 (get32_direct - (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (384 + (392 * i))); - zeta1 <- - VPBROADCAST_8u32 (get32_direct - (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (388 + (392 * i))); - (r0, r2, r4, r6, r1, r3, r5, r7) <@ __invntt___butterfly64x (r0, r2, - r4, r6, r1, r3, r5, r7, zeta0, zeta0, zeta1, zeta1, qx16); - r0 <@ __red16x (r0, qx16, vx16); - if ((i = 0)) { - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 0) + (256 * i)) (r0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 1) + (256 * i)) (r2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 2) + (256 * i)) (r4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 3) + (256 * i)) (r6))); - } else { - - } - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 4) + (256 * i)) (r1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 5) + (256 * i)) (r3))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 6) + (256 * i)) (r5))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 7) + (256 * i)) (r7))); - i <- i + 1; - } - zeta0 <- - VPBROADCAST_8u32 (get32_direct - (WArray800.init16 (fun i_0 => (zetasp).[i_0])) 784); - zeta1 <- - VPBROADCAST_8u32 (get32_direct - (WArray800.init16 (fun i_0 => (zetasp).[i_0])) 788); - i <- 0; - while (i < 2) { - if ((i = 0)) { - r7 <- r6; - r6 <- r4; - r5 <- r2; - r4 <- r0; - } else { - r4 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 8) + (128 * i))); - r5 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 9) + (128 * i))); - r6 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 10) + (128 * i))); - r7 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 11) + (128 * i))); - } - r0 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 0) + (128 * i))); - r1 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 1) + (128 * i))); - r2 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 2) + (128 * i))); - r3 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 3) + (128 * i))); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __invntt___butterfly64x (r0, r1, - r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - flox16 <- (get256 (WArray32.init16 (fun i_0 => (jflox16).[i_0])) 0); - fhix16 <- (get256 (WArray32.init16 (fun i_0 => (jfhix16).[i_0])) 0); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 8) + (128 * i)) (r4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 9) + (128 * i)) (r5))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 10) + (128 * i)) (r6))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 11) + (128 * i)) (r7))); - r0 <@ __fqmulprecomp16x (r0, flox16, fhix16, qx16); - r1 <@ __fqmulprecomp16x (r1, flox16, fhix16, qx16); - r2 <@ __fqmulprecomp16x (r2, flox16, fhix16, qx16); - r3 <@ __fqmulprecomp16x (r3, flox16, fhix16, qx16); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 0) + (128 * i)) (r0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 1) + (128 * i)) (r1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 2) + (128 * i)) (r2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 3) + (128 * i)) (r3))); - i <- i + 1; - } - return (rp); - } - - proc __butterfly64x (rl0:W256.t, rl1:W256.t, rl2:W256.t, rl3:W256.t, - rh0:W256.t, rh1:W256.t, rh2:W256.t, rh3:W256.t, - zl0:W256.t, zl1:W256.t, zh0:W256.t, zh1:W256.t, - qx16:W256.t) : W256.t * W256.t * W256.t * W256.t * - W256.t * W256.t * W256.t * W256.t = { - - var t0:W256.t; - var t1:W256.t; - var t2:W256.t; - var t3:W256.t; - var t4:W256.t; - var t5:W256.t; - var t6:W256.t; - var t7:W256.t; - - t0 <- VPMULL_16u16 zl0 rh0; - t1 <- VPMULH_16u16 zh0 rh0; - t2 <- VPMULL_16u16 zl0 rh1; - t3 <- VPMULH_16u16 zh0 rh1; - t4 <- VPMULL_16u16 zl1 rh2; - t5 <- VPMULH_16u16 zh1 rh2; - t6 <- VPMULL_16u16 zl1 rh3; - t7 <- VPMULH_16u16 zh1 rh3; - t0 <- VPMULH_16u16 t0 qx16; - t2 <- VPMULH_16u16 t2 qx16; - t4 <- VPMULH_16u16 t4 qx16; - t6 <- VPMULH_16u16 t6 qx16; - rh1 <- VPSUB_16u16 rl1 t3; - rl1 <- VPADD_16u16 t3 rl1; - rh0 <- VPSUB_16u16 rl0 t1; - rl0 <- VPADD_16u16 t1 rl0; - rh3 <- VPSUB_16u16 rl3 t7; - rl3 <- VPADD_16u16 t7 rl3; - rh2 <- VPSUB_16u16 rl2 t5; - rl2 <- VPADD_16u16 t5 rl2; - rh0 <- VPADD_16u16 t0 rh0; - rl0 <- VPSUB_16u16 rl0 t0; - rh1 <- VPADD_16u16 t2 rh1; - rl1 <- VPSUB_16u16 rl1 t2; - rh2 <- VPADD_16u16 t4 rh2; - rl2 <- VPSUB_16u16 rl2 t4; - rh3 <- VPADD_16u16 t6 rh3; - rl3 <- VPSUB_16u16 rl3 t6; - return (rl0, rl1, rl2, rl3, rh0, rh1, rh2, rh3); - } - - proc _poly_ntt (rp:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var zetasp:W16.t Array400.t; - var qx16:W256.t; - var zeta0:W256.t; - var zeta1:W256.t; - var r0:W256.t; - var r1:W256.t; - var r2:W256.t; - var r3:W256.t; - var r4:W256.t; - var r5:W256.t; - var r6:W256.t; - var r7:W256.t; - var i:int; - var zeta2:W256.t; - var zeta3:W256.t; - var vx16:W256.t; - zetasp <- witness; - zetasp <- jzetas_exp; - qx16 <- (get256 (WArray32.init16 (fun i_0 => (jqx16).[i_0])) 0); - zeta0 <- - VPBROADCAST_8u32 (get32 (WArray800.init16 (fun i_0 => (zetasp).[i_0])) 0); - zeta1 <- - VPBROADCAST_8u32 (get32 (WArray800.init16 (fun i_0 => (zetasp).[i_0])) 1); - r0 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 0)); - r1 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 1)); - r2 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 2)); - r3 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 3)); - r4 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 8)); - r5 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 9)); - r6 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 10)); - r7 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 11)); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __butterfly64x (r0, r1, r2, r3, r4, - r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 0) (r0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 1) (r1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 2) (r2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 3) (r3))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 8) (r4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 9) (r5))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 10) (r6))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 11) (r7))); - r0 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 4)); - r1 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 5)); - r2 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 6)); - r3 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 7)); - r4 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 12)); - r5 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 13)); - r6 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 14)); - r7 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 15)); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __butterfly64x (r0, r1, r2, r3, r4, - r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 12) (r4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 13) (r5))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 14) (r6))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * 15) (r7))); - i <- 0; - while (i < 2) { - zeta0 <- - VPBROADCAST_8u32 (get32_direct - (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (8 + (392 * i))); - zeta1 <- - VPBROADCAST_8u32 (get32_direct - (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (12 + (392 * i))); - if ((i = 0)) { - r4 <- r0; - r5 <- r1; - r6 <- r2; - r7 <- r3; - } else { - r4 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 4) + (256 * i))); - r5 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 5) + (256 * i))); - r6 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 6) + (256 * i))); - r7 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 7) + (256 * i))); - } - r0 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 0) + (256 * i))); - r1 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 1) + (256 * i))); - r2 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 2) + (256 * i))); - r3 <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) - ((32 * 3) + (256 * i))); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __butterfly64x (r0, r1, r2, r3, r4, - r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (16 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (48 + (392 * i))); - (r0, r4) <@ __shuffle8 (r0, r4); - (r1, r5) <@ __shuffle8 (r1, r5); - (r2, r6) <@ __shuffle8 (r2, r6); - (r3, r7) <@ __shuffle8 (r3, r7); - (r0, r4, r1, r5, r2, r6, r3, r7) <@ __butterfly64x (r0, r4, r1, r5, r2, - r6, r3, r7, zeta0, zeta0, zeta1, zeta1, qx16); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (80 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (112 + (392 * i))); - (r0, r2) <@ __shuffle4 (r0, r2); - (r4, r6) <@ __shuffle4 (r4, r6); - (r1, r3) <@ __shuffle4 (r1, r3); - (r5, r7) <@ __shuffle4 (r5, r7); - (r0, r2, r4, r6, r1, r3, r5, r7) <@ __butterfly64x (r0, r2, r4, r6, r1, - r3, r5, r7, zeta0, zeta0, zeta1, zeta1, qx16); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (144 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (176 + (392 * i))); - (r0, r1) <@ __shuffle2 (r0, r1); - (r2, r3) <@ __shuffle2 (r2, r3); - (r4, r5) <@ __shuffle2 (r4, r5); - (r6, r7) <@ __shuffle2 (r6, r7); - (r0, r1, r2, r3, r4, r5, r6, r7) <@ __butterfly64x (r0, r1, r2, r3, r4, - r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (208 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (240 + (392 * i))); - (r0, r4) <@ __shuffle1 (r0, r4); - (r1, r5) <@ __shuffle1 (r1, r5); - (r2, r6) <@ __shuffle1 (r2, r6); - (r3, r7) <@ __shuffle1 (r3, r7); - (r0, r4, r1, r5, r2, r6, r3, r7) <@ __butterfly64x (r0, r4, r1, r5, r2, - r6, r3, r7, zeta0, zeta0, zeta1, zeta1, qx16); - zeta0 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (272 + (392 * i))); - zeta2 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (304 + (392 * i))); - zeta1 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (336 + (392 * i))); - zeta3 <- - (get256_direct (WArray800.init16 (fun i_0 => (zetasp).[i_0])) - (368 + (392 * i))); - (r0, r4, r2, r6, r1, r5, r3, r7) <@ __butterfly64x (r0, r4, r2, r6, r1, - r5, r3, r7, zeta0, zeta1, zeta2, zeta3, qx16); - vx16 <- (get256 (WArray32.init16 (fun i_0 => (jvx16).[i_0])) 0); - r0 <@ __red16x (r0, qx16, vx16); - r4 <@ __red16x (r4, qx16, vx16); - r2 <@ __red16x (r2, qx16, vx16); - r6 <@ __red16x (r6, qx16, vx16); - r1 <@ __red16x (r1, qx16, vx16); - r5 <@ __red16x (r5, qx16, vx16); - r3 <@ __red16x (r3, qx16, vx16); - r7 <@ __red16x (r7, qx16, vx16); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 0) + (256 * i)) (r0))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 1) + (256 * i)) (r4))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 2) + (256 * i)) (r1))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 3) + (256 * i)) (r5))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 4) + (256 * i)) (r2))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 5) + (256 * i)) (r6))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 6) + (256 * i)) (r3))); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) ((32 * 7) + (256 * i)) (r7))); - i <- i + 1; - } - return (rp); - } - - proc __poly_reduce (rp:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var qx16:W256.t; - var vx16:W256.t; - var i:int; - var r:W256.t; - - qx16 <- (get256 (WArray32.init16 (fun i_0 => (jqx16).[i_0])) 0); - vx16 <- (get256 (WArray32.init16 (fun i_0 => (jvx16).[i_0])) 0); - i <- 0; - while (i < 16) { - r <- - (get256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * i)); - r <@ __red16x (r, qx16, vx16); - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * i) (r))); - i <- i + 1; - } - return (rp); - } - - proc _poly_sub (rp:W16.t Array256.t, ap:W16.t Array256.t, - bp:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var i:int; - var a:W256.t; - var b:W256.t; - var r:W256.t; - - i <- 0; - while (i < 16) { - a <- - (get256_direct (WArray512.init16 (fun i_0 => (ap).[i_0])) (32 * i)); - b <- - (get256_direct (WArray512.init16 (fun i_0 => (bp).[i_0])) (32 * i)); - r <- VPSUB_16u16 a b; - rp <- - Array256.init - (WArray512.get16 (WArray512.set256_direct (WArray512.init16 (fun i_0 => (rp).[i_0])) (32 * i) (r))); - i <- i + 1; - } - return (rp); - } - - proc _poly_tobytes (rp:W64.t, a:W16.t Array256.t) : W16.t Array256.t = { - var aux: int; - - var jqx16_p:W16.t Array16.t; - var qx16:W256.t; - var i:int; - var t0:W256.t; - var t1:W256.t; - var t2:W256.t; - var t3:W256.t; - var t4:W256.t; - var t5:W256.t; - var t6:W256.t; - var t7:W256.t; - var tt:W256.t; - var ttt:W256.t; - jqx16_p <- witness; - jqx16_p <- jqx16; - qx16 <- (get256 (WArray32.init16 (fun i_0 => (jqx16_p).[i_0])) 0); - a <@ _poly_csubq (a); - i <- 0; - while (i < 2) { - t0 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) (8 * i)); - t1 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((8 * i) + 1)); - t2 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((8 * i) + 2)); - t3 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((8 * i) + 3)); - t4 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((8 * i) + 4)); - t5 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((8 * i) + 5)); - t6 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((8 * i) + 6)); - t7 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((8 * i) + 7)); - tt <- VPSLL_16u16 t1 (W8.of_int 12); - tt <- (tt `|` t0); - t0 <- VPSRL_16u16 t1 (W8.of_int 4); - t1 <- VPSLL_16u16 t2 (W8.of_int 8); - t0 <- (t0 `|` t1); - t1 <- VPSRL_16u16 t2 (W8.of_int 8); - t2 <- VPSLL_16u16 t3 (W8.of_int 4); - t1 <- (t1 `|` t2); - t2 <- VPSLL_16u16 t5 (W8.of_int 12); - t2 <- (t2 `|` t4); - t3 <- VPSRL_16u16 t5 (W8.of_int 4); - t4 <- VPSLL_16u16 t6 (W8.of_int 8); - t3 <- (t3 `|` t4); - t4 <- VPSRL_16u16 t6 (W8.of_int 8); - t5 <- VPSLL_16u16 t7 (W8.of_int 4); - t4 <- (t4 `|` t5); - (ttt, t0) <@ __shuffle1 (tt, t0); - (tt, t2) <@ __shuffle1 (t1, t2); - (t1, t4) <@ __shuffle1 (t3, t4); - (t3, tt) <@ __shuffle2 (ttt, tt); - (ttt, t0) <@ __shuffle2 (t1, t0); - (t1, t4) <@ __shuffle2 (t2, t4); - (t2, ttt) <@ __shuffle4 (t3, ttt); - (t3, tt) <@ __shuffle4 (t1, tt); - (t1, t4) <@ __shuffle4 (t0, t4); - (t0, t3) <@ __shuffle8 (t2, t3); - (t2, ttt) <@ __shuffle8 (t1, ttt); - (t1, t4) <@ __shuffle8 (tt, t4); - Glob.mem <- - storeW256 Glob.mem (W64.to_uint (rp + (W64.of_int (192 * i)))) (t0); - Glob.mem <- - storeW256 Glob.mem (W64.to_uint (rp + (W64.of_int ((192 * i) + 32)))) (t2); - Glob.mem <- - storeW256 Glob.mem (W64.to_uint (rp + (W64.of_int ((192 * i) + 64)))) (t1); - Glob.mem <- - storeW256 Glob.mem (W64.to_uint (rp + (W64.of_int ((192 * i) + 96)))) (t3); - Glob.mem <- - storeW256 Glob.mem (W64.to_uint (rp + (W64.of_int ((192 * i) + 128)))) (ttt); - Glob.mem <- - storeW256 Glob.mem (W64.to_uint (rp + (W64.of_int ((192 * i) + 160)))) (t4); - i <- i + 1; - } - return (a); - } - - proc _poly_tomsg_1 (rp:W8.t Array32.t, a:W16.t Array256.t) : W8.t Array32.t * - W16.t Array256.t = { - var aux: int; - - var px16:W16.t Array16.t; - var hq:W256.t; - var hhq:W256.t; - var i:int; - var f0:W256.t; - var f1:W256.t; - var g0:W256.t; - var g1:W256.t; - var c:W32.t; - px16 <- witness; - a <@ _poly_csubq (a); - px16 <- hqx16_m1; - hq <- (get256 (WArray32.init16 (fun i_0 => (px16).[i_0])) 0); - px16 <- hhqx16; - hhq <- (get256 (WArray32.init16 (fun i_0 => (px16).[i_0])) 0); - aux <- (256 %/ 32); - i <- 0; - while (i < aux) { - f0 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) (2 * i)); - f1 <- (get256 (WArray512.init16 (fun i_0 => (a).[i_0])) ((2 * i) + 1)); - f0 <- VPSUB_16u16 hq f0; - f1 <- VPSUB_16u16 hq f1; - g0 <- VPSRA_16u16 f0 (W8.of_int 15); - g1 <- VPSRA_16u16 f1 (W8.of_int 15); - f0 <- VPXOR_256 f0 g0; - f1 <- VPXOR_256 f1 g1; - f0 <- VPSUB_16u16 f0 hhq; - f1 <- VPSUB_16u16 f1 hhq; - f0 <- VPACKSS_16u16 f0 f1; - f0 <- VPERMQ f0 (W8.of_int 216); - c <- VPMOVMSKB_u256u32 f0; - rp <- - Array32.init - (WArray32.get8 (WArray32.set32 (WArray32.init8 (fun i_0 => (rp).[i_0])) i (c))); - i <- i + 1; - } - return (rp, a); - } - - proc __polyvec_add2 (r:W16.t Array768.t, b:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_add2 ((Array256.init (fun i => r.[0 + i])), - (Array256.init (fun i => b.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_add2 ((Array256.init (fun i => r.[256 + i])), - (Array256.init (fun i => b.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_add2 ((Array256.init (fun i => r.[(2 * 256) + i])), - (Array256.init (fun i => b.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_csubq (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_csubq ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_csubq ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_csubq ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_decompress (rp:W64.t) : W16.t Array768.t = { - var aux: int; - - var r:W16.t Array768.t; - var q:W256.t; - var shufbidx:W256.t; - var sllvdidx:W256.t; - var mask:W256.t; - var i:int; - var k:int; - var f:W256.t; - r <- witness; - q <- VPBROADCAST_8u32 pvd_q_s; - shufbidx <- - (get256 (WArray32.init8 (fun i_0 => (pvd_shufbdidx_s).[i_0])) 0); - sllvdidx <- VPBROADCAST_4u64 pvd_sllvdidx_s; - mask <- VPBROADCAST_8u32 pvd_mask_s; - k <- 0; - while (k < 3) { - aux <- (256 %/ 16); - i <- 0; - while (i < aux) { - f <- - (loadW256 Glob.mem (W64.to_uint (rp + (W64.of_int ((320 * k) + (20 * i)))))); - f <- VPERMQ f (W8.of_int 148); - f <- VPSHUFB_256 f shufbidx; - f <- VPSLLV_8u32 f sllvdidx; - f <- VPSRL_16u16 f (W8.of_int 1); - f <- VPAND_256 f mask; - f <- VPMULHRS_16u16 f q; - r <- - Array768.init - (WArray1536.get16 (WArray1536.set256 (WArray1536.init16 (fun i_0 => (r).[i_0])) ((16 * k) + i) (f))); - i <- i + 1; - } - k <- k + 1; - } - return (r); - } - - proc __polyvec_compress (rp:W64.t, a:W16.t Array768.t) : unit = { - var aux: int; - - var x16p:W16.t Array16.t; - var v:W256.t; - var v8:W256.t; - var off:W256.t; - var shift1:W256.t; - var mask:W256.t; - var shift2:W256.t; - var sllvdidx:W256.t; - var shufbidx:W256.t; - var i:int; - var f0:W256.t; - var f1:W256.t; - var f2:W256.t; - var t0:W128.t; - var t1:W128.t; - x16p <- witness; - a <@ __polyvec_csubq (a); - x16p <- jvx16; - v <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - v8 <- VPSLL_16u16 v (W8.of_int 3); - off <- VPBROADCAST_16u16 pvc_off_s; - shift1 <- VPBROADCAST_16u16 pvc_shift1_s; - mask <- VPBROADCAST_16u16 pvc_mask_s; - shift2 <- VPBROADCAST_4u64 pvc_shift2_s; - sllvdidx <- VPBROADCAST_4u64 pvc_sllvdidx_s; - shufbidx <- - (get256 (WArray32.init8 (fun i_0 => (pvc_shufbidx_s).[i_0])) 0); - aux <- ((3 * 256) %/ 16); - i <- 0; - while (i < aux) { - f0 <- (get256 (WArray1536.init16 (fun i_0 => (a).[i_0])) i); - f1 <- VPMULL_16u16 f0 v8; - f2 <- VPADD_16u16 f0 off; - f0 <- VPSLL_16u16 f0 (W8.of_int 3); - f0 <- VPMULH_16u16 f0 v; - f2 <- VPSUB_16u16 f1 f2; - f1 <- VPANDN_256 f1 f2; - f1 <- VPSRL_16u16 f1 (W8.of_int 15); - f0 <- VPSUB_16u16 f0 f1; - f0 <- VPMULHRS_16u16 f0 shift1; - f0 <- VPAND_256 f0 mask; - f0 <- VPMADDWD_256 f0 shift2; - f0 <- VPSLLV_8u32 f0 sllvdidx; - f0 <- VPSRL_4u64 f0 (W8.of_int 12); - f0 <- VPSHUFB_256 f0 shufbidx; - t0 <- (truncateu128 f0); - t1 <- VEXTRACTI128 f0 (W8.of_int 1); - t0 <- VPBLEND_8u16 t0 t1 (W8.of_int 224); - Glob.mem <- - storeW128 Glob.mem (W64.to_uint (rp + (W64.of_int (20 * i)))) (t0); - Glob.mem <- - storeW32 Glob.mem (W64.to_uint (rp + (W64.of_int ((20 * i) + 16)))) (VPEXTR_32 t1 - (W8.of_int 0)); - i <- i + 1; - } - return (); - } - - proc __polyvec_compress_1 (rp:W8.t Array960.t, a:W16.t Array768.t) : - W8.t Array960.t = { - var aux: int; - - var x16p:W16.t Array16.t; - var v:W256.t; - var v8:W256.t; - var off:W256.t; - var shift1:W256.t; - var mask:W256.t; - var shift2:W256.t; - var sllvdidx:W256.t; - var shufbidx:W256.t; - var i:int; - var f0:W256.t; - var f1:W256.t; - var f2:W256.t; - var t0:W128.t; - var t1:W128.t; - x16p <- witness; - a <@ __polyvec_csubq (a); - x16p <- jvx16; - v <- (get256 (WArray32.init16 (fun i_0 => (x16p).[i_0])) 0); - v8 <- VPSLL_16u16 v (W8.of_int 3); - off <- VPBROADCAST_16u16 pvc_off_s; - shift1 <- VPBROADCAST_16u16 pvc_shift1_s; - mask <- VPBROADCAST_16u16 pvc_mask_s; - shift2 <- VPBROADCAST_4u64 pvc_shift2_s; - sllvdidx <- VPBROADCAST_4u64 pvc_sllvdidx_s; - shufbidx <- - (get256 (WArray32.init8 (fun i_0 => (pvc_shufbidx_s).[i_0])) 0); - aux <- ((3 * 256) %/ 16); - i <- 0; - while (i < aux) { - f0 <- (get256 (WArray1536.init16 (fun i_0 => (a).[i_0])) i); - f1 <- VPMULL_16u16 f0 v8; - f2 <- VPADD_16u16 f0 off; - f0 <- VPSLL_16u16 f0 (W8.of_int 3); - f0 <- VPMULH_16u16 f0 v; - f2 <- VPSUB_16u16 f1 f2; - f1 <- VPANDN_256 f1 f2; - f1 <- VPSRL_16u16 f1 (W8.of_int 15); - f0 <- VPSUB_16u16 f0 f1; - f0 <- VPMULHRS_16u16 f0 shift1; - f0 <- VPAND_256 f0 mask; - f0 <- VPMADDWD_256 f0 shift2; - f0 <- VPSLLV_8u32 f0 sllvdidx; - f0 <- VPSRL_4u64 f0 (W8.of_int 12); - f0 <- VPSHUFB_256 f0 shufbidx; - t0 <- (truncateu128 f0); - t1 <- VEXTRACTI128 f0 (W8.of_int 1); - t0 <- VPBLEND_8u16 t0 t1 (W8.of_int 224); - rp <- - Array960.init - (WArray960.get8 (WArray960.set128_direct (WArray960.init8 (fun i_0 => (rp).[i_0])) (20 * i) (t0))); - rp <- - Array960.init - (WArray960.get8 (WArray960.set32_direct (WArray960.init8 (fun i_0 => (rp).[i_0])) ((20 * i) + 16) (VPEXTR_32 t1 - (W8.of_int 0)))); - i <- i + 1; - } - return (rp); - } - - proc __polyvec_frombytes (ap:W64.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - var r:W16.t Array768.t; - var pp:W64.t; - r <- witness; - pp <- ap; - aux <@ _poly_frombytes ((Array256.init (fun i => r.[0 + i])), pp); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_frombytes ((Array256.init (fun i => r.[256 + i])), pp); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_frombytes ((Array256.init (fun i => r.[(2 * 256) + i])), - pp); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_invntt (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_invntt ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_invntt ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_invntt ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_ntt (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_ntt ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_ntt ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_ntt ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_reduce (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ __poly_reduce ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ __poly_reduce ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ __poly_reduce ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_pointwise_acc (r:W16.t Array256.t, a:W16.t Array768.t, - b:W16.t Array768.t) : W16.t Array256.t = { - - var t:W16.t Array256.t; - t <- witness; - r <@ _poly_basemul (r, (Array256.init (fun i => a.[0 + i])), - (Array256.init (fun i => b.[0 + i]))); - t <@ _poly_basemul (t, (Array256.init (fun i => a.[256 + i])), - (Array256.init (fun i => b.[256 + i]))); - r <@ _poly_add2 (r, t); - t <@ _poly_basemul (t, (Array256.init (fun i => a.[(2 * 256) + i])), - (Array256.init (fun i => b.[(2 * 256) + i]))); - r <@ _poly_add2 (r, t); - return (r); - } - - proc __polyvec_tobytes (rp:W64.t, a:W16.t Array768.t) : unit = { - var aux: W16.t Array256.t; - - var pp:W64.t; - - pp <- rp; - aux <@ _poly_tobytes (pp, (Array256.init (fun i => a.[0 + i]))); - a <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else a.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_tobytes (pp, (Array256.init (fun i => a.[256 + i]))); - a <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else a.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_tobytes (pp, (Array256.init (fun i => a.[(2 * 256) + i]))); - a <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else a.[i]); - return (); - } - - proc __rej_uniform (rp:W16.t Array256.t, offset:W64.t, buf:W8.t Array168.t) : - W64.t * W16.t Array256.t = { - - var ctr:W64.t; - var pos:W64.t; - var exit:W64.t; - var val1:W16.t; - var t:W16.t; - var val2:W16.t; - var cnd0:W64.t; - var cnd1:W64.t; - - ctr <- offset; - pos <- (W64.of_int 0); - exit <- (W64.of_int 0); - - while ((exit = (W64.of_int 0))) { - val1 <- (zeroextu16 buf.[(W64.to_uint pos)]); - pos <- (pos + (W64.of_int 1)); - t <- (zeroextu16 buf.[(W64.to_uint pos)]); - val2 <- t; - val2 <- (val2 `>>` (W8.of_int 4)); - t <- (t `&` (W16.of_int 15)); - t <- (t `<<` (W8.of_int 8)); - val1 <- (val1 `|` t); - pos <- (pos + (W64.of_int 1)); - t <- (zeroextu16 buf.[(W64.to_uint pos)]); - t <- (t `<<` (W8.of_int 4)); - val2 <- (val2 `|` t); - pos <- (pos + (W64.of_int 1)); - if ((val1 \ult (W16.of_int 3329))) { - rp.[(W64.to_uint ctr)] <- val1; - ctr <- (ctr + (W64.of_int 1)); - } else { - - } - if ((val2 \ult (W16.of_int 3329))) { - if ((ctr \ult (W64.of_int 256))) { - rp.[(W64.to_uint ctr)] <- val2; - ctr <- (ctr + (W64.of_int 1)); - } else { - - } - } else { - - } - cnd0 <- (W64.of_int 256); - cnd0 <- (cnd0 - ctr); - cnd0 <- (cnd0 - (W64.of_int 1)); - cnd1 <- (W64.of_int 168); - cnd1 <- (cnd1 - pos); - cnd1 <- (cnd1 - (W64.of_int 3)); - exit <- (cnd0 `|` cnd1); - exit <- (exit `>>` (W8.of_int 63)); - } - return (ctr, rp); - } - - proc __gen_matrix (seed:W8.t Array32.t, transposed:W64.t) : W16.t Array2304.t = { - var aux: int; - var aux_0: W16.t Array256.t; - - var r:W16.t Array2304.t; - var stransposed:W64.t; - var j:int; - var c:W8.t; - var extseed:W8.t Array34.t; - var i:int; - var state:W64.t Array25.t; - var ctr:W64.t; - var sctr:W64.t; - var buf:W8.t Array168.t; - var poly:W16.t Array256.t; - var k:W64.t; - var l:W64.t; - var t:W16.t; - buf <- witness; - extseed <- witness; - poly <- witness; - r <- witness; - state <- witness; - stransposed <- transposed; - j <- 0; - while (j < 32) { - c <- seed.[j]; - extseed.[j] <- c; - j <- j + 1; - } - i <- 0; - while (i < 3) { - j <- 0; - while (j < 3) { - transposed <- stransposed; - if ((transposed = (W64.of_int 0))) { - extseed.[32] <- (W8.of_int j); - extseed.[(32 + 1)] <- (W8.of_int i); - } else { - extseed.[32] <- (W8.of_int i); - extseed.[(32 + 1)] <- (W8.of_int j); - } - state <@ _shake128_absorb34 (state, extseed); - ctr <- (W64.of_int 0); - - while ((ctr \ult (W64.of_int 256))) { - sctr <- ctr; - (state, buf) <@ _shake128_squeezeblock (state, buf); - ctr <- sctr; - (ctr, poly) <@ __rej_uniform (poly, ctr, buf); - } - k <- (W64.of_int 0); - l <- (W64.of_int ((i * (3 * 256)) + (j * 256))); - - while ((k \ult (W64.of_int 256))) { - t <- poly.[(W64.to_uint k)]; - r.[(W64.to_uint l)] <- t; - k <- (k + (W64.of_int 1)); - l <- (l + (W64.of_int 1)); - } - j <- j + 1; - } - i <- i + 1; - } - i <- 0; - while (i < 3) { - j <- 0; - while (j < 3) { - aux_0 <@ _nttunpack ((Array256.init (fun i_0 => r.[((i * (3 * 256)) + (j * 256)) + i_0]))); - r <- Array2304.init - (fun i_0 => if ((i * (3 * 256)) + (j * 256)) <= i_0 < ((i * (3 * 256)) + (j * 256)) + 256 - then aux_0.[i_0-((i * (3 * 256)) + (j * 256))] else r.[i_0]); - j <- j + 1; - } - i <- i + 1; - } - return (r); - } - - proc __indcpa_keypair (pkp:W64.t, skp:W64.t, randomnessp:W8.t Array32.t) : unit = { - var aux: int; - var aux_3: W16.t Array256.t; - var aux_2: W16.t Array256.t; - var aux_1: W16.t Array256.t; - var aux_0: W16.t Array256.t; - - var spkp:W64.t; - var sskp:W64.t; - var i:int; - var t64:W64.t; - var inbuf:W8.t Array32.t; - var buf:W8.t Array64.t; - var publicseed:W8.t Array32.t; - var noiseseed:W8.t Array32.t; - var aa:W16.t Array2304.t; - var nonce:W8.t; - var skpv:W16.t Array768.t; - var e:W16.t Array768.t; - var pkpv:W16.t Array768.t; - aa <- witness; - buf <- witness; - e <- witness; - inbuf <- witness; - noiseseed <- witness; - pkpv <- witness; - publicseed <- witness; - skpv <- witness; - spkp <- pkp; - sskp <- skp; - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (randomnessp).[i_0])) i); - inbuf <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (inbuf).[i_0])) i (t64))); - i <- i + 1; - } - buf <@ _sha3_512_32 (buf, inbuf); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray64.init8 (fun i_0 => (buf).[i_0])) i); - publicseed <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (publicseed).[i_0])) i (t64))); - t64 <- - (get64 (WArray64.init8 (fun i_0 => (buf).[i_0])) (i + (32 %/ 8))); - noiseseed <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (noiseseed).[i_0])) i (t64))); - i <- i + 1; - } - aa <@ __gen_matrix (publicseed, (W64.of_int 0)); - nonce <- (W8.of_int 0); - (aux_3, aux_2, aux_1, - aux_0) <@ _poly_getnoise_eta1_4x ((Array256.init (fun i_0 => skpv.[0 + i_0])), - (Array256.init (fun i_0 => skpv.[256 + i_0])), - (Array256.init (fun i_0 => skpv.[(2 * 256) + i_0])), - (Array256.init (fun i_0 => e.[0 + i_0])), noiseseed, nonce); - skpv <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_3.[i_0-0] - else skpv.[i_0]); - skpv <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_2.[i_0-256] - else skpv.[i_0]); - skpv <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_1.[i_0-(2 * 256)] else skpv.[i_0]); - e <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_0.[i_0-0] else e.[i_0]); - nonce <- (W8.of_int 4); - (aux_3, aux_2, aux_1, - aux_0) <@ _poly_getnoise_eta1_4x ((Array256.init (fun i_0 => e.[256 + i_0])), - (Array256.init (fun i_0 => e.[(2 * 256) + i_0])), - (Array256.init (fun i_0 => pkpv.[0 + i_0])), - (Array256.init (fun i_0 => pkpv.[256 + i_0])), noiseseed, nonce); - e <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_3.[i_0-256] - else e.[i_0]); - e <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_2.[i_0-(2 * 256)] else e.[i_0]); - pkpv <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_1.[i_0-0] - else pkpv.[i_0]); - pkpv <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_0.[i_0-256] - else pkpv.[i_0]); - skpv <@ __polyvec_ntt (skpv); - e <@ __polyvec_ntt (e); - i <- 0; - while (i < 3) { - aux_3 <@ __polyvec_pointwise_acc ((Array256.init (fun i_0 => pkpv.[(i * 256) + i_0])), - (Array768.init (fun i_0 => aa.[(i * (3 * 256)) + i_0])), skpv); - pkpv <- Array768.init - (fun i_0 => if (i * 256) <= i_0 < (i * 256) + 256 - then aux_3.[i_0-(i * 256)] else pkpv.[i_0]); - aux_3 <@ _poly_frommont ((Array256.init (fun i_0 => pkpv.[(i * 256) + i_0]))); - pkpv <- Array768.init - (fun i_0 => if (i * 256) <= i_0 < (i * 256) + 256 - then aux_3.[i_0-(i * 256)] else pkpv.[i_0]); - i <- i + 1; - } - pkpv <@ __polyvec_add2 (pkpv, e); - pkpv <@ __polyvec_reduce (pkpv); - pkp <- spkp; - skp <- sskp; - __polyvec_tobytes (skp, skpv); - __polyvec_tobytes (pkp, pkpv); - pkp <- (pkp + (W64.of_int (3 * 384))); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (publicseed).[i_0])) i); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (pkp + (W64.of_int 0))) (t64); - pkp <- (pkp + (W64.of_int 8)); - i <- i + 1; - } - return (); - } - - proc __indcpa_enc_0 (sctp:W64.t, msgp:W8.t Array32.t, pkp:W64.t, - noiseseed:W8.t Array32.t) : unit = { - var aux_3: int; - var aux_2: W16.t Array256.t; - var aux_1: W16.t Array256.t; - var aux_0: W16.t Array256.t; - var aux: W16.t Array256.t; - - var pkpv:W16.t Array768.t; - var i:W64.t; - var t64:W64.t; - var publicseed:W8.t Array32.t; - var k:W16.t Array256.t; - var s_noiseseed:W8.t Array32.t; - var aat:W16.t Array2304.t; - var lnoiseseed:W8.t Array32.t; - var nonce:W8.t; - var sp_0:W16.t Array768.t; - var ep:W16.t Array768.t; - var epp:W16.t Array256.t; - var bp:W16.t Array768.t; - var w:int; - var v:W16.t Array256.t; - var ctp:W64.t; - aat <- witness; - bp <- witness; - ep <- witness; - epp <- witness; - k <- witness; - lnoiseseed <- witness; - pkpv <- witness; - publicseed <- witness; - s_noiseseed <- witness; - sp_0 <- witness; - v <- witness; - pkpv <@ __polyvec_frombytes (pkp); - i <- (W64.of_int 0); - pkp <- (pkp + (W64.of_int (3 * 384))); - - while ((i \ult (W64.of_int (32 %/ 8)))) { - t64 <- (loadW64 Glob.mem (W64.to_uint (pkp + (W64.of_int 0)))); - publicseed <- - Array32.init - (WArray32.get8 (WArray32.set64_direct (WArray32.init8 (fun i_0 => (publicseed).[i_0])) (8 * (W64.to_uint i)) (t64))); - pkp <- (pkp + (W64.of_int 8)); - i <- (i + (W64.of_int 1)); - } - k <@ _poly_frommsg_1 (k, msgp); - s_noiseseed <- noiseseed; - aat <@ __gen_matrix (publicseed, (W64.of_int 1)); - lnoiseseed <- s_noiseseed; - nonce <- (W8.of_int 0); - (aux_2, aux_1, aux_0, - aux) <@ _poly_getnoise_eta1_4x ((Array256.init (fun i_0 => sp_0.[0 + i_0])), - (Array256.init (fun i_0 => sp_0.[256 + i_0])), - (Array256.init (fun i_0 => sp_0.[(2 * 256) + i_0])), - (Array256.init (fun i_0 => ep.[0 + i_0])), lnoiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_2.[i_0-0] - else sp_0.[i_0]); - sp_0 <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_1.[i_0-256] - else sp_0.[i_0]); - sp_0 <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_0.[i_0-(2 * 256)] else sp_0.[i_0]); - ep <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else ep.[i_0]); - nonce <- (W8.of_int 4); - (aux_2, aux_1, aux_0, - aux) <@ _poly_getnoise_eta1_4x ((Array256.init (fun i_0 => ep.[256 + i_0])), - (Array256.init (fun i_0 => ep.[(2 * 256) + i_0])), epp, - (Array256.init (fun i_0 => bp.[0 + i_0])), lnoiseseed, nonce); - ep <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_2.[i_0-256] - else ep.[i_0]); - ep <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_1.[i_0-(2 * 256)] else ep.[i_0]); - epp <- aux_0; - bp <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else bp.[i_0]); - sp_0 <@ __polyvec_ntt (sp_0); - w <- 0; - while (w < 3) { - aux_2 <@ __polyvec_pointwise_acc ((Array256.init (fun i_0 => bp.[(w * 256) + i_0])), - (Array768.init (fun i_0 => aat.[(w * (3 * 256)) + i_0])), sp_0); - bp <- Array768.init - (fun i_0 => if (w * 256) <= i_0 < (w * 256) + 256 - then aux_2.[i_0-(w * 256)] else bp.[i_0]); - w <- w + 1; - } - v <@ __polyvec_pointwise_acc (v, pkpv, sp_0); - bp <@ __polyvec_invntt (bp); - v <@ _poly_invntt (v); - bp <@ __polyvec_add2 (bp, ep); - v <@ _poly_add2 (v, epp); - v <@ _poly_add2 (v, k); - bp <@ __polyvec_reduce (bp); - v <@ __poly_reduce (v); - ctp <- sctp; - __polyvec_compress (ctp, bp); - ctp <- (ctp + (W64.of_int (3 * 320))); - v <@ _poly_compress (ctp, v); - return (); - } - - proc __indcpa_enc_1 (ctp:W8.t Array1088.t, msgp:W8.t Array32.t, pkp:W64.t, - noiseseed:W8.t Array32.t) : W8.t Array1088.t = { - var aux_3: int; - var aux_5: W8.t Array128.t; - var aux_4: W8.t Array960.t; - var aux_2: W16.t Array256.t; - var aux_1: W16.t Array256.t; - var aux_0: W16.t Array256.t; - var aux: W16.t Array256.t; - - var sctp:W8.t Array1088.t; - var pkpv:W16.t Array768.t; - var i:W64.t; - var t64:W64.t; - var publicseed:W8.t Array32.t; - var k:W16.t Array256.t; - var s_noiseseed:W8.t Array32.t; - var aat:W16.t Array2304.t; - var lnoiseseed:W8.t Array32.t; - var nonce:W8.t; - var sp_0:W16.t Array768.t; - var ep:W16.t Array768.t; - var epp:W16.t Array256.t; - var bp:W16.t Array768.t; - var w:int; - var v:W16.t Array256.t; - aat <- witness; - bp <- witness; - ep <- witness; - epp <- witness; - k <- witness; - lnoiseseed <- witness; - pkpv <- witness; - publicseed <- witness; - s_noiseseed <- witness; - sctp <- witness; - sp_0 <- witness; - v <- witness; - sctp <- ctp; - pkpv <@ __polyvec_frombytes (pkp); - i <- (W64.of_int 0); - pkp <- (pkp + (W64.of_int (3 * 384))); - - while ((i \ult (W64.of_int (32 %/ 8)))) { - t64 <- (loadW64 Glob.mem (W64.to_uint (pkp + (W64.of_int 0)))); - publicseed <- - Array32.init - (WArray32.get8 (WArray32.set64_direct (WArray32.init8 (fun i_0 => (publicseed).[i_0])) (8 * (W64.to_uint i)) (t64))); - pkp <- (pkp + (W64.of_int 8)); - i <- (i + (W64.of_int 1)); - } - k <@ _poly_frommsg_1 (k, msgp); - s_noiseseed <- noiseseed; - aat <@ __gen_matrix (publicseed, (W64.of_int 1)); - lnoiseseed <- s_noiseseed; - nonce <- (W8.of_int 0); - (aux_2, aux_1, aux_0, - aux) <@ _poly_getnoise_eta1_4x ((Array256.init (fun i_0 => sp_0.[0 + i_0])), - (Array256.init (fun i_0 => sp_0.[256 + i_0])), - (Array256.init (fun i_0 => sp_0.[(2 * 256) + i_0])), - (Array256.init (fun i_0 => ep.[0 + i_0])), lnoiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_2.[i_0-0] - else sp_0.[i_0]); - sp_0 <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_1.[i_0-256] - else sp_0.[i_0]); - sp_0 <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_0.[i_0-(2 * 256)] else sp_0.[i_0]); - ep <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else ep.[i_0]); - nonce <- (W8.of_int 4); - (aux_2, aux_1, aux_0, - aux) <@ _poly_getnoise_eta1_4x ((Array256.init (fun i_0 => ep.[256 + i_0])), - (Array256.init (fun i_0 => ep.[(2 * 256) + i_0])), epp, - (Array256.init (fun i_0 => bp.[0 + i_0])), lnoiseseed, nonce); - ep <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_2.[i_0-256] - else ep.[i_0]); - ep <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_1.[i_0-(2 * 256)] else ep.[i_0]); - epp <- aux_0; - bp <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else bp.[i_0]); - sp_0 <@ __polyvec_ntt (sp_0); - w <- 0; - while (w < 3) { - aux_2 <@ __polyvec_pointwise_acc ((Array256.init (fun i_0 => bp.[(w * 256) + i_0])), - (Array768.init (fun i_0 => aat.[(w * (3 * 256)) + i_0])), sp_0); - bp <- Array768.init - (fun i_0 => if (w * 256) <= i_0 < (w * 256) + 256 - then aux_2.[i_0-(w * 256)] else bp.[i_0]); - w <- w + 1; - } - v <@ __polyvec_pointwise_acc (v, pkpv, sp_0); - bp <@ __polyvec_invntt (bp); - v <@ _poly_invntt (v); - bp <@ __polyvec_add2 (bp, ep); - v <@ _poly_add2 (v, epp); - v <@ _poly_add2 (v, k); - bp <@ __polyvec_reduce (bp); - v <@ __poly_reduce (v); - ctp <- sctp; - aux_4 <@ __polyvec_compress_1 ((Array960.init (fun i_0 => ctp.[0 + i_0])), - bp); - ctp <- Array1088.init - (fun i_0 => if 0 <= i_0 < 0 + 960 then aux_4.[i_0-0] - else ctp.[i_0]); - (aux_5, - aux_2) <@ _poly_compress_1 ((Array128.init (fun i_0 => ctp.[(3 * 320) + i_0])), - v); - ctp <- Array1088.init - (fun i_0 => if (3 * 320) <= i_0 < (3 * 320) + 128 - then aux_5.[i_0-(3 * 320)] else ctp.[i_0]); - v <- aux_2; - return (ctp); - } - - proc __indcpa_dec_1 (msgp:W8.t Array32.t, ctp:W64.t, skp:W64.t) : W8.t Array32.t = { - - var bp:W16.t Array768.t; - var v:W16.t Array256.t; - var skpv:W16.t Array768.t; - var t:W16.t Array256.t; - var mp:W16.t Array256.t; - bp <- witness; - mp <- witness; - skpv <- witness; - t <- witness; - v <- witness; - bp <@ __polyvec_decompress (ctp); - ctp <- (ctp + (W64.of_int (3 * 320))); - v <@ _poly_decompress (v, ctp); - skpv <@ __polyvec_frombytes (skp); - bp <@ __polyvec_ntt (bp); - t <@ __polyvec_pointwise_acc (t, skpv, bp); - t <@ _poly_invntt (t); - mp <@ _poly_sub (mp, v, t); - mp <@ __poly_reduce (mp); - (msgp, mp) <@ _poly_tomsg_1 (msgp, mp); - return (msgp); - } - - proc __verify (ctp:W64.t, ctpc:W8.t Array1088.t) : W64.t = { - var aux: int; - - var cnd:W64.t; - var t64:W64.t; - var h:W256.t; - var i:int; - var f:W256.t; - var g:W256.t; - var zf:bool; - var off:int; - var t1:W8.t; - var t2:W8.t; - var _0:bool; - var _1:bool; - var _2:bool; - var _3:bool; - - cnd <- (W64.of_int 0); - t64 <- (W64.of_int 1); - h <- set0_256 ; - aux <- (((3 * 320) + 128) %/ 32); - i <- 0; - while (i < aux) { - f <- - (get256_direct (WArray1088.init8 (fun i_0 => (ctpc).[i_0])) (32 * i)); - g <- (loadW256 Glob.mem (W64.to_uint (ctp + (W64.of_int (32 * i))))); - f <- VPXOR_256 f g; - h <- VPOR_256 h f; - i <- i + 1; - } - ( _0, _1, _2, _3, zf) <- VPTEST_256 h h; - cnd <- ((! zf) ? t64 : cnd); - off <- ((((3 * 320) + 128) %/ 32) * 32); - aux <- ((3 * 320) + 128); - i <- off; - while (i < aux) { - t1 <- (get8_direct (WArray1088.init8 (fun i_0 => (ctpc).[i_0])) i); - t2 <- (loadW8 Glob.mem (W64.to_uint (ctp + (W64.of_int i)))); - t1 <- (t1 `^` t2); - t64 <- (zeroextu64 t1); - cnd <- (cnd `|` t64); - i <- i + 1; - } - cnd <- (- cnd); - cnd <- (cnd `>>` (W8.of_int 63)); - return (cnd); - } - - proc __cmov (dst:W8.t Array32.t, src:W64.t, cnd:W64.t) : W8.t Array32.t = { - var aux: int; - - var scnd:W64.t; - var m:W256.t; - var i:int; - var f:W256.t; - var g:W256.t; - var off:int; - var bcond:W8.t; - var t1:W8.t; - var t2:W8.t; - - cnd <- (- cnd); - scnd <- cnd; - m <- VPBROADCAST_4u64 scnd; - aux <- (32 %/ 32); - i <- 0; - while (i < aux) { - f <- - (get256_direct (WArray32.init8 (fun i_0 => (dst).[i_0])) (32 * i)); - g <- (loadW256 Glob.mem (W64.to_uint (src + (W64.of_int (32 * i))))); - f <- VPBLENDVB_256 f g m; - dst <- - Array32.init - (WArray32.get8 (WArray32.set256_direct (WArray32.init8 (fun i_0 => (dst).[i_0])) (32 * i) (f))); - i <- i + 1; - } - off <- ((32 %/ 32) * 32); - bcond <- (truncateu8 cnd); - i <- off; - while (i < 32) { - t1 <- (get8_direct (WArray32.init8 (fun i_0 => (dst).[i_0])) i); - t2 <- (loadW8 Glob.mem (W64.to_uint (src + (W64.of_int i)))); - t2 <- (t2 `^` t1); - t2 <- (t2 `&` (truncateu8 cnd)); - t1 <- (t1 `^` t2); - dst <- - Array32.init - (WArray32.get8 (WArray32.set8_direct (WArray32.init8 (fun i_0 => (dst).[i_0])) i (t1))); - i <- i + 1; - } - return (dst); - } - - proc __crypto_kem_keypair_jazz (pkp:W64.t, skp:W64.t, - randomnessp:W8.t Array64.t) : unit = { - var aux: int; - - var s_randomnessp:W8.t Array64.t; - var s_pkp:W64.t; - var s_skp:W64.t; - var randomnessp1:W8.t Array32.t; - var i:int; - var t64:W64.t; - var h_pk:W8.t Array32.t; - var randomnessp2:W8.t Array32.t; - h_pk <- witness; - randomnessp1 <- witness; - randomnessp2 <- witness; - s_randomnessp <- witness; - s_randomnessp <- randomnessp; - s_pkp <- pkp; - s_skp <- skp; - randomnessp1 <- (Array32.init (fun i_0 => randomnessp.[0 + i_0])); - __indcpa_keypair (pkp, skp, randomnessp1); - skp <- s_skp; - skp <- (skp + (W64.of_int (3 * 384))); - pkp <- s_pkp; - aux <- (((3 * 384) + 32) %/ 8); - i <- 0; - while (i < aux) { - t64 <- (loadW64 Glob.mem (W64.to_uint (pkp + (W64.of_int (8 * i))))); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (skp + (W64.of_int 0))) (t64); - skp <- (skp + (W64.of_int 8)); - i <- i + 1; - } - s_skp <- skp; - pkp <- s_pkp; - t64 <- (W64.of_int ((3 * 384) + 32)); - h_pk <@ _isha3_256 (h_pk, pkp, t64); - skp <- s_skp; - i <- 0; - while (i < 4) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (h_pk).[i_0])) i); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (skp + (W64.of_int 0))) (t64); - skp <- (skp + (W64.of_int 8)); - i <- i + 1; - } - randomnessp <- s_randomnessp; - randomnessp2 <- (Array32.init (fun i_0 => randomnessp.[32 + i_0])); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (randomnessp2).[i_0])) i); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (skp + (W64.of_int 0))) (t64); - skp <- (skp + (W64.of_int 8)); - i <- i + 1; - } - return (); - } - - proc __crypto_kem_enc_jazz (ctp:W64.t, shkp:W64.t, pkp:W64.t, - randomnessp:W8.t Array32.t) : unit = { - var aux: int; - var aux_0: W8.t Array32.t; - - var s_pkp:W64.t; - var s_ctp:W64.t; - var s_shkp:W64.t; - var i:int; - var t64:W64.t; - var kr:W8.t Array64.t; - var buf:W8.t Array64.t; - buf <- witness; - kr <- witness; - s_pkp <- pkp; - s_ctp <- ctp; - s_shkp <- shkp; - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (randomnessp).[i_0])) i); - kr <- - Array64.init - (WArray64.get8 (WArray64.set64 (WArray64.init8 (fun i_0 => (kr).[i_0])) i (t64))); - i <- i + 1; - } - t64 <- (W64.of_int 32); - aux_0 <@ _isha3_256_32 ((Array32.init (fun i_0 => buf.[0 + i_0])), - (Array32.init (fun i_0 => kr.[0 + i_0]))); - buf <- Array64.init - (fun i_0 => if 0 <= i_0 < 0 + 32 then aux_0.[i_0-0] - else buf.[i_0]); - pkp <- s_pkp; - t64 <- (W64.of_int ((3 * 384) + 32)); - aux_0 <@ _isha3_256 ((Array32.init (fun i_0 => buf.[32 + i_0])), pkp, - t64); - buf <- Array64.init - (fun i_0 => if 32 <= i_0 < 32 + 32 then aux_0.[i_0-32] - else buf.[i_0]); - kr <@ _sha3_512_64 (kr, buf); - pkp <- s_pkp; - __indcpa_enc_0 (s_ctp, (Array32.init (fun i_0 => buf.[0 + i_0])), pkp, - (Array32.init (fun i_0 => kr.[32 + i_0]))); - ctp <- s_ctp; - t64 <- (W64.of_int ((3 * 320) + 128)); - aux_0 <@ _isha3_256 ((Array32.init (fun i_0 => kr.[32 + i_0])), ctp, - t64); - kr <- Array64.init - (fun i_0 => if 32 <= i_0 < 32 + 32 then aux_0.[i_0-32] - else kr.[i_0]); - shkp <- s_shkp; - t64 <- (W64.of_int 32); - _shake256_64 (shkp, t64, kr); - return (); - } - - proc __crypto_kem_dec_jazz (shkp:W64.t, ctp:W64.t, skp:W64.t) : unit = { - var aux_0: int; - var aux: W8.t Array32.t; - - var s_shkp:W64.t; - var s_ctp:W64.t; - var buf:W8.t Array64.t; - var hp:W64.t; - var i:int; - var t64:W64.t; - var s_skp:W64.t; - var kr:W8.t Array64.t; - var pkp:W64.t; - var ctpc:W8.t Array1088.t; - var cnd:W64.t; - var zp:W64.t; - buf <- witness; - ctpc <- witness; - kr <- witness; - s_shkp <- shkp; - s_ctp <- ctp; - aux <@ __indcpa_dec_1 ((Array32.init (fun i_0 => buf.[0 + i_0])), ctp, - skp); - buf <- Array64.init - (fun i_0 => if 0 <= i_0 < 0 + 32 then aux.[i_0-0] else buf.[i_0]); - hp <- (skp + (W64.of_int 32)); - hp <- (hp + (W64.of_int (((24 * 3) * 256) `|>>` 3))); - aux_0 <- (32 %/ 8); - i <- 0; - while (i < aux_0) { - t64 <- (loadW64 Glob.mem (W64.to_uint (hp + (W64.of_int (8 * i))))); - buf <- - Array64.init - (WArray64.get8 (WArray64.set64_direct (WArray64.init8 (fun i_0 => (buf).[i_0])) (32 + (8 * i)) (t64))); - i <- i + 1; - } - s_skp <- skp; - kr <@ _sha3_512_64 (kr, buf); - pkp <- s_skp; - pkp <- (pkp + (W64.of_int (((12 * 3) * 256) `|>>` 3))); - ctpc <@ __indcpa_enc_1 (ctpc, (Array32.init (fun i_0 => buf.[0 + i_0])), - pkp, (Array32.init (fun i_0 => kr.[32 + i_0]))); - ctp <- s_ctp; - cnd <@ __verify (ctp, ctpc); - zp <- s_skp; - zp <- (zp + (W64.of_int 64)); - zp <- (zp + (W64.of_int (((24 * 3) * 256) `|>>` 3))); - aux <@ __cmov ((Array32.init (fun i_0 => kr.[0 + i_0])), zp, cnd); - kr <- Array64.init - (fun i_0 => if 0 <= i_0 < 0 + 32 then aux.[i_0-0] else kr.[i_0]); - t64 <- (W64.of_int ((3 * 320) + 128)); - aux <@ _isha3_256 ((Array32.init (fun i_0 => kr.[32 + i_0])), ctp, t64); - kr <- Array64.init - (fun i_0 => if 32 <= i_0 < 32 + 32 then aux.[i_0-32] - else kr.[i_0]); - shkp <- s_shkp; - t64 <- (W64.of_int 32); - _shake256_64 (shkp, t64, kr); - return (); - } - - proc jade_kem_kyber_kyber768_amd64_avx2v_keypair (public_key:W64.t, - secret_key:W64.t) : - W64.t = { - - var r:W64.t; - var randomness:W8.t Array64.t; - var randomnessp:W8.t Array64.t; - var _of_:bool; - var _cf_:bool; - var _sf_:bool; - var _zf_:bool; - var _0:bool; - randomness <- witness; - randomnessp <- witness; - public_key <- public_key; - secret_key <- secret_key; - randomnessp <- randomness; - randomnessp <@ SC.randombytes_64 (randomnessp); - __crypto_kem_keypair_jazz (public_key, secret_key, randomnessp); - (_of_, _cf_, _sf_, _0, _zf_, r) <- set0_64 ; - return (r); - } - - proc jade_kem_kyber_kyber768_amd64_avx2v_enc (ciphertext:W64.t, - shared_secret:W64.t, - public_key:W64.t) : W64.t = { - - var r:W64.t; - var randomness:W8.t Array32.t; - var randomnessp:W8.t Array32.t; - var _of_:bool; - var _cf_:bool; - var _sf_:bool; - var _zf_:bool; - var _0:bool; - randomness <- witness; - randomnessp <- witness; - ciphertext <- ciphertext; - shared_secret <- shared_secret; - public_key <- public_key; - randomnessp <- randomness; - randomnessp <@ SC.randombytes_32 (randomnessp); - __crypto_kem_enc_jazz (ciphertext, shared_secret, public_key, - randomnessp); - (_of_, _cf_, _sf_, _0, _zf_, r) <- set0_64 ; - return (r); - } - - proc jade_kem_kyber_kyber768_amd64_avx2v_dec (shared_secret:W64.t, - ciphertext:W64.t, - secret_key:W64.t) : W64.t = { - - var r:W64.t; - var _of_:bool; - var _cf_:bool; - var _sf_:bool; - var _zf_:bool; - var _0:bool; - - __crypto_kem_dec_jazz (shared_secret, ciphertext, secret_key); - (_of_, _cf_, _sf_, _0, _zf_, r) <- set0_64 ; - return (r); - } -}. - diff --git a/code/jasmin/avx2v/fips202.c b/code/jasmin/avx2v/fips202.c deleted file mode 100644 index d300328b..00000000 --- a/code/jasmin/avx2v/fips202.c +++ /dev/null @@ -1,549 +0,0 @@ -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ - -#include -#include -#include "fips202.h" - -#define NROUNDS 24 -#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) - -/************************************************* -* Name: load64 -* -* Description: Load 8 bytes into uint64_t in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns the loaded 64-bit unsigned integer -**************************************************/ -static uint64_t load64(const unsigned char *x) -{ - unsigned long long r = 0, i; - - for (i = 0; i < 8; ++i) { - r |= (unsigned long long)x[i] << 8 * i; - } - return r; -} - -/************************************************* -* Name: store64 -* -* Description: Store a 64-bit integer to a byte array in little-endian order -* -* Arguments: - uint8_t *x: pointer to the output byte array -* - uint64_t u: input 64-bit unsigned integer -**************************************************/ -static void store64(uint8_t *x, uint64_t u) -{ - unsigned int i; - - for(i=0; i<8; ++i) { - x[i] = u; - u >>= 8; - } -} - -/* Keccak round constants */ -static const uint64_t KeccakF_RoundConstants[NROUNDS] = -{ - (uint64_t)0x0000000000000001ULL, - (uint64_t)0x0000000000008082ULL, - (uint64_t)0x800000000000808aULL, - (uint64_t)0x8000000080008000ULL, - (uint64_t)0x000000000000808bULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008009ULL, - (uint64_t)0x000000000000008aULL, - (uint64_t)0x0000000000000088ULL, - (uint64_t)0x0000000080008009ULL, - (uint64_t)0x000000008000000aULL, - (uint64_t)0x000000008000808bULL, - (uint64_t)0x800000000000008bULL, - (uint64_t)0x8000000000008089ULL, - (uint64_t)0x8000000000008003ULL, - (uint64_t)0x8000000000008002ULL, - (uint64_t)0x8000000000000080ULL, - (uint64_t)0x000000000000800aULL, - (uint64_t)0x800000008000000aULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008080ULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008008ULL -}; - -/************************************************* -* Name: KeccakF1600_StatePermute -* -* Description: The Keccak F1600 Permutation -* -* Arguments: - uint64_t * state: pointer to in/output Keccak state -**************************************************/ -static void KeccakF1600_StatePermute(uint64_t * state) -{ - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - uint64_t BCa, BCe, BCi, BCo, BCu; - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - //copyFromState(A, state) - Aba = state[ 0]; - Abe = state[ 1]; - Abi = state[ 2]; - Abo = state[ 3]; - Abu = state[ 4]; - Aga = state[ 5]; - Age = state[ 6]; - Agi = state[ 7]; - Ago = state[ 8]; - Agu = state[ 9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for( round = 0; round < NROUNDS; round += 2 ) - { - // prepareTheta - BCa = Aba^Aga^Aka^Ama^Asa; - BCe = Abe^Age^Ake^Ame^Ase; - BCi = Abi^Agi^Aki^Ami^Asi; - BCo = Abo^Ago^Ako^Amo^Aso; - BCu = Abu^Agu^Aku^Amu^Asu; - - //thetaRhoPiChiIotaPrepareTheta(round , A, E) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^((~BCe)& BCi ); - Eba ^= (uint64_t)KeccakF_RoundConstants[round]; - Ebe = BCe ^((~BCi)& BCo ); - Ebi = BCi ^((~BCo)& BCu ); - Ebo = BCo ^((~BCu)& BCa ); - Ebu = BCu ^((~BCa)& BCe ); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^((~BCe)& BCi ); - Ege = BCe ^((~BCi)& BCo ); - Egi = BCi ^((~BCo)& BCu ); - Ego = BCo ^((~BCu)& BCa ); - Egu = BCu ^((~BCa)& BCe ); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^((~BCe)& BCi ); - Eke = BCe ^((~BCi)& BCo ); - Eki = BCi ^((~BCo)& BCu ); - Eko = BCo ^((~BCu)& BCa ); - Eku = BCu ^((~BCa)& BCe ); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^((~BCe)& BCi ); - Eme = BCe ^((~BCi)& BCo ); - Emi = BCi ^((~BCo)& BCu ); - Emo = BCo ^((~BCu)& BCa ); - Emu = BCu ^((~BCa)& BCe ); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^((~BCe)& BCi ); - Ese = BCe ^((~BCi)& BCo ); - Esi = BCi ^((~BCo)& BCu ); - Eso = BCo ^((~BCu)& BCa ); - Esu = BCu ^((~BCa)& BCe ); - - // prepareTheta - BCa = Eba^Ega^Eka^Ema^Esa; - BCe = Ebe^Ege^Eke^Eme^Ese; - BCi = Ebi^Egi^Eki^Emi^Esi; - BCo = Ebo^Ego^Eko^Emo^Eso; - BCu = Ebu^Egu^Eku^Emu^Esu; - - //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^((~BCe)& BCi ); - Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; - Abe = BCe ^((~BCi)& BCo ); - Abi = BCi ^((~BCo)& BCu ); - Abo = BCo ^((~BCu)& BCa ); - Abu = BCu ^((~BCa)& BCe ); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^((~BCe)& BCi ); - Age = BCe ^((~BCi)& BCo ); - Agi = BCi ^((~BCo)& BCu ); - Ago = BCo ^((~BCu)& BCa ); - Agu = BCu ^((~BCa)& BCe ); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^((~BCe)& BCi ); - Ake = BCe ^((~BCi)& BCo ); - Aki = BCi ^((~BCo)& BCu ); - Ako = BCo ^((~BCu)& BCa ); - Aku = BCu ^((~BCa)& BCe ); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^((~BCe)& BCi ); - Ame = BCe ^((~BCi)& BCo ); - Ami = BCi ^((~BCo)& BCu ); - Amo = BCo ^((~BCu)& BCa ); - Amu = BCu ^((~BCa)& BCe ); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^((~BCe)& BCi ); - Ase = BCe ^((~BCi)& BCo ); - Asi = BCi ^((~BCo)& BCu ); - Aso = BCo ^((~BCu)& BCa ); - Asu = BCu ^((~BCa)& BCe ); - } - - //copyToState(state, A) - state[ 0] = Aba; - state[ 1] = Abe; - state[ 2] = Abi; - state[ 3] = Abo; - state[ 4] = Abu; - state[ 5] = Aga; - state[ 6] = Age; - state[ 7] = Agi; - state[ 8] = Ago; - state[ 9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; - - #undef round -} - -#include -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - - -/************************************************* -* Name: keccak_absorb -* -* Description: Absorb step of Keccak; -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -* - const unsigned char *m: pointer to input to be absorbed into s -* - unsigned long long mlen: length of input in bytes -* - unsigned char p: domain-separation byte for different Keccak-derived functions -**************************************************/ -static void keccak_absorb(uint64_t *s, - unsigned int r, - const unsigned char *m, unsigned long long int mlen, - unsigned char p) -{ - unsigned long long i; - unsigned char t[200]; - - // Zero state - for (i = 0; i < 25; ++i) - s[i] = 0; - - while (mlen >= r) - { - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(m + 8 * i); - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) - t[i] = 0; - for (i = 0; i < mlen; ++i) - t[i] = m[i]; - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(t + 8 * i); -} - - -/************************************************* -* Name: keccak_squeezeblocks -* -* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *h: pointer to output blocks -* - unsigned long long int nblocks: number of blocks to be squeezed (written to h) -* - uint64_t *s: pointer to in/output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -**************************************************/ -static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, - uint64_t *s, - unsigned int r) -{ - unsigned int i; - while(nblocks > 0) - { - KeccakF1600_StatePermute(s); - for(i=0;i<(r>>3);i++) - { - store64(h+8*i, s[i]); - } - h += r; - nblocks--; - } -} - - -/************************************************* -* Name: shake128_absorb -* -* Description: Absorb step of the SHAKE128 XOF. -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to input to be absorbed into s -* - unsigned long long inputByteLen: length of input in bytes -**************************************************/ -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) -{ - keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F); -} - -/************************************************* -* Name: shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - uint64_t *s: pointer to in/output Keccak state -**************************************************/ -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) -{ - keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); -} - -/************************************************* -* Name: shake256 -* -* Description: SHAKE256 XOF with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: requested output length in bytes - - const unsigned char *input: pointer to input - - unsigned long long inlen: length of input in bytes -**************************************************/ -void shake256(unsigned char *output, unsigned long long outlen, - const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHAKE256_RATE]; - unsigned long long nblocks = outlen/SHAKE256_RATE; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F); - - /* Squeeze output */ - keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); - - output+=nblocks*SHAKE256_RATE; - outlen-=nblocks*SHAKE256_RATE; - - if(outlen) - { - keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); - for(i=0;i - -#define SHAKE128_RATE 168 -#define SHAKE256_RATE 136 -#define SHA3_256_RATE 136 -#define SHA3_512_RATE 72 - -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); - -void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen); - -void sha3_256(unsigned char *output, const unsigned char *input, unsigned long long inlen); -void sha3_512(unsigned char *output, const unsigned char *input, unsigned long long inlen); - - - -void shake256_128_33_jazz(unsigned char *output, const unsigned char *input); -void sha3_512_32_jazz(unsigned char *output, const unsigned char *input); - -void shake128_absorb34_jazz(uint64_t *s, const unsigned char *input); -void shake128_squeezeblock_jazz(unsigned char *output, uint64_t *s); - - -#endif diff --git a/code/jasmin/avx2v/fips202.jinc b/code/jasmin/avx2v/fips202.jinc deleted file mode 100644 index 3ef81807..00000000 --- a/code/jasmin/avx2v/fips202.jinc +++ /dev/null @@ -1,587 +0,0 @@ -require "params.jinc" -require "keccakf1600.jinc" -require "fips202_common.jinc" - -inline -fn __st0(reg ptr u64[25] state) -> reg ptr u64[25] -{ - inline int i; - - for i = 0 to 25 { - state[i] = 0; - } - - return state; -} - - -inline -fn __add_full_block( - stack u64[25] state, - reg u64 in, - reg u64 inlen, - reg u64 r8 -) -> stack u64[25], reg u64, reg u64 -{ - reg u64 i t r64; - - r64 = r8; - r64 >>= 3; - i = 0; - while (i < r64) - { - t = [in + 8 * i]; - state[(int) i] ^= t; - i = i + 1; - } - - in += r8; - inlen -= r8; - - return state, in, inlen; -} - - -inline -fn __add_final_block( - stack u64[25] state, - reg u64 in, - reg u64 inlen, - reg u8 trail_byte, - reg u64 r8 -) -> stack u64[25] -{ - reg u64 i, t, inlen8; - reg u8 c; - - inlen8 = inlen; - inlen8 >>= 3; - i = 0; - while ( i < inlen8) - { - t = [in + 8*i]; - state[(int) i] ^= t; - i = i + 1; - } - - i <<= 3; - while (i < inlen) - { - c = (u8)[in + i]; - state[u8 (int) i] ^= c; - i = i + 1; - } - - state[u8 (int) i] ^= trail_byte; - - i = r8; - i -= 1; - state[u8 (int) i] ^= 0x80; - - return state; -} - - -inline -fn __xtr_full_block( - stack u64[25] state, - reg u64 out, - reg u64 outlen, - reg u64 rate -) -> reg u64, reg u64 -{ - reg u64 i t rate64; - - rate64 = rate; - rate64 >>= 3; - i = 0; - while (i < rate64) - { - t = state[(int) i]; - [out + 8 * i] = t; - i = i + 1; - } - - out += rate; - outlen -= rate; - - return out, outlen; -} - - -inline -fn __xtr_bytes( - stack u64[25] state, - reg u64 out, - reg u64 outlen -) -{ - reg u64 i t outlen8; - reg u8 c; - - outlen8 = outlen; - outlen8 >>= 3; - i = 0; - while (i < outlen8 ) - { - t = state[(int) i]; - [out + 8 * i] = t; - i = i + 1; - } - i <<= 3; - - while (i < outlen) - { - c = state[u8 (int) i]; - (u8)[out + i] = c; - i = i + 1; - } -} - - -inline -fn __keccak1600_scalar( - stack u64 s_out s_outlen, - reg u64 in inlen, - stack u64 s_trail_byte, - reg u64 rate -) -{ - stack u64[25] state; - stack u64 s_in, s_inlen, s_rate; - reg u64 out, outlen, t; - reg u8 trail_byte; - - state = __st0(state); - - while ( inlen >= rate ) - { - state, in, inlen = __add_full_block(state, in, inlen, rate); - - s_in = in; - s_inlen = inlen; - s_rate = rate; - - state = _keccakf1600_scalar(state); - - inlen = s_inlen; - in = s_in; - rate = s_rate; - } - - t = s_trail_byte; - trail_byte = (8u) t; - state = __add_final_block(state, in, inlen, trail_byte, rate); - - outlen = s_outlen; - - while ( outlen > rate ) - { - s_outlen = outlen; - s_rate = rate; - - state = _keccakf1600_scalar(state); - - out = s_out; - outlen = s_outlen; - rate = s_rate; - - out, outlen = __xtr_full_block(state, out, outlen, rate); - s_outlen = outlen; - s_out = out; - } - - state = _keccakf1600_scalar(state); - out = s_out; - outlen = s_outlen; - - __xtr_bytes(state, out, outlen); -} - - -#[returnaddress="stack"] -fn _shake256(reg u64 out outlen in inlen) -{ - stack u64 ds; - stack u64 rate; - - ds = 0x1f; - rate = SHAKE256_RATE; - - __keccak1600_scalar(out, outlen, in, inlen, ds, rate); -} - - -#[returnaddress="stack"] -fn _sha3_512(reg u64 out in inlen) -{ - reg u64 ds; - reg u64 rate; - reg u64 outlen; - - ds = 0x06; - rate = SHA3_512_RATE; - outlen = 64; - - __keccak1600_scalar(out, outlen, in, inlen, ds, rate); -} - - -#[returnaddress="stack"] -fn _sha3_256(reg u64 out in inlen) -{ - reg u64 ds; - reg u64 rate; - reg u64 outlen; - - ds = 0x06; - rate = SHA3_256_RATE; - outlen = 32; - - __keccak1600_scalar(out, outlen, in, inlen, ds, rate); -} - - -#[returnaddress="stack"] -fn _isha3_256(reg ptr u8[32] out, reg u64 in inlen) -> reg ptr u8[32] -{ - stack u64[25] state; - stack ptr u8[32] s_out; - stack u64 s_in s_ilen s_r8; - reg u64 ilen r8 t64; - reg u8 t8; - inline int i; - - s_out = out; - - state = __st0(state); - - r8 = SHA3_256_RATE; - ilen = inlen; - - while(ilen >= r8) - { - state, in, ilen = __add_full_block(state, in, ilen, r8); - - s_in = in; - s_ilen = ilen; - s_r8 = r8; - - state = _keccakf1600_scalar(state); - - in = s_in; - ilen = s_ilen; - r8 = s_r8; - } - - t8 = 0x06; - state = __add_final_block(state, in, ilen, t8, r8); - - state = _keccakf1600_scalar(state); - - out = s_out; - - for i=0 to 4 - { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - -inline -fn __isha3_512(reg ptr u8[64] out, reg u64 in, inline int inlen) -> stack u8[64] -{ - stack u64[25] state; - stack ptr u8[64] s_out; - stack u64 s_in s_ilen s_r8; - reg u64 ilen r8 t64; - reg u8 t8; - inline int i; - - s_out = out; - - state = __st0(state); - - r8 = SHA3_512_RATE; - ilen = inlen; - - while(ilen >= r8) - { - state, in, ilen = __add_full_block(state, in, ilen, r8); - - s_in = in; - s_ilen = ilen; - s_r8 = r8; - - state = _keccakf1600_scalar(state); - - in = s_in; - ilen = s_ilen; - r8 = s_r8; - } - - t8 = 0x06; - state = __add_final_block(state, in, ilen, t8, r8); - - state = _keccakf1600_scalar(state); - - out = s_out; - - for i=0 to 8 - { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - -#[returnaddress="stack"] -fn _shake256_64(reg u64 out outlen, reg const ptr u8[64] in) -{ - reg u64 t64 j; - reg u8 c; - stack u64[25] state; - stack u64 s_out s_outlen; - inline int i; - - s_out = out; - s_outlen = outlen; - - state = __st0(state); - - for i = 0 to 8 { - t64 = in[u64 i]; - state[u64 i] ^= t64; - } - - state[u8 64] ^= 0x1f; - state[u8 SHAKE256_RATE-1] ^= 0x80; - - state = _keccakf1600_scalar(state); - - outlen = s_outlen; - out = s_out; - - while(outlen > SHAKE256_RATE) - { - for i = 0 to SHAKE256_RATE/8 - { - t64 = state[u64 i]; - (u64)[out + 8*i] = t64; - } - - out += SHAKE256_RATE; - outlen -= SHAKE256_RATE; - - s_out = out; - s_outlen = outlen; - - state = _keccakf1600_scalar(state); - - outlen = s_outlen; - out = s_out; - } - - s_outlen = outlen; - outlen >>= 3; - j = 0; - while(j < outlen) - { - t64 = state[(int) j]; - (u64)[out + 8 * j] = t64; - j = j + 1; - } - - j <<= 3; - outlen = s_outlen; - - while (j < outlen) - { - c = state[u8 (int) j]; - (u8)[out + j] = c; - j = j + 1; - } -} - -#[returnaddress="stack"] -fn _shake256_128_33(reg ptr u8[128] out, reg const ptr u8[33] in) -> stack u8[128] -{ - stack u64[25] state; - reg u64 t64; - reg u8 c; - inline int i; - - stack ptr u8[128] sout; - - sout = out; - - state = __st0(state); - - for i = 0 to 4 { - t64 = in[u64 i]; - state[u64 i] ^= t64; - } - - c = in[32]; - state[u8 32] ^= c; - state[u8 33] ^= 0x1f; - state[u8 SHAKE256_RATE-1] ^= 0x80; - - state = _keccakf1600_scalar(state); - - out = sout; - - for i = 0 to 16 { - t64 = state[u64 i]; - out[u64 i] = t64; - } - - return out; -} - -#[returnaddress="stack"] -fn _isha3_256_32(reg ptr u8[32] out, reg ptr u8[KYBER_SYMBYTES] in) -> reg ptr u8[32] -{ - stack u64[25] state; - stack ptr u8[32] s_out; - reg u64 t64; - inline int i; - - s_out = out; - - state = __st0(state); - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = in[u64 i]; - state[u64 i] = t64; - } - - state[u8 KYBER_SYMBYTES] ^= 0x06; - state[u8 SHA3_256_RATE - 1] = 0x80; - - state = _keccakf1600_scalar(state); - - out = s_out; - - for i=0 to 4 - { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - -#[returnaddress="stack"] -fn _sha3_512_64(reg ptr u8[64] out, reg const ptr u8[64] in) -> stack u8[64] -{ - stack u64[25] state; - stack ptr u8[64] out_s; - reg u64 t64; - inline int i; - - state = __st0(state); - - for i = 0 to 8 - { - t64 = in[u64 i]; - state[i] ^= t64; - } - - state[u8 64] ^= 0x06; - state[u8 SHA3_512_RATE - 1] ^= 0x80; - - out_s = out; - - state = _keccakf1600_scalar(state); - - out = out_s; - - for i = 0 to 8 - { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - -#[returnaddress="stack"] -fn _sha3_512_32(reg ptr u8[64] out, reg const ptr u8[32] in) -> stack u8[64] -{ - stack u64[25] state; - stack ptr u8[64] out_s; - reg u64 t64; - inline int i; - - state = __st0(state); - - for i = 0 to 4 - { - t64 = in[u64 i]; - state[i] ^= t64; - } - - state[u8 32] ^= 0x06; - state[u8 SHA3_512_RATE-1] ^= 0x80; - - out_s = out; - - state = _keccakf1600_scalar(state); - - out = out_s; - - for i = 0 to 8 { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - -fn _shake128_absorb34(reg ptr u64[25] state, reg const ptr u8[34] in) -> reg ptr u64[25] -{ - reg u64 t64; - reg u16 t16; - inline int i; - - state = __st0(state); - - for i = 0 to 4 - { - t64 = in[u64 i]; - state[u64 i] ^= t64; - } - - t16 = in.[u16 32]; - state[u16 16] ^= t16; - - state[u8 34] ^= 0x1f; - - state[u8 SHAKE128_RATE-1] ^= 0x80; - - return state; -} - -#[returnaddress="stack"] -fn _shake128_squeezeblock(reg ptr u64[25] state, reg ptr u8[SHAKE128_RATE] out) -> reg ptr u64[25], reg ptr u8[SHAKE128_RATE] -{ - stack ptr u8[SHAKE128_RATE] out_s; - reg u64 t; - inline int i; - - out_s = out; - state = _keccakf1600_scalar(state); - out = out_s; - - for i = 0 to SHAKE128_RATE/8 - { - t = state[i]; - out[u64 i] = t; - } - return state, out; -} diff --git a/code/jasmin/avx2v/fips202_4x.jinc b/code/jasmin/avx2v/fips202_4x.jinc deleted file mode 100644 index a1409887..00000000 --- a/code/jasmin/avx2v/fips202_4x.jinc +++ /dev/null @@ -1,1434 +0,0 @@ -require "fips202_common.jinc" - -u256 rho56 = 0x181F1E1D1C1B1A191017161514131211080F0E0D0C0B0A090007060504030201; -u256 rho8 = 0x1E1D1C1B1A19181F16151413121110170E0D0C0B0A09080F0605040302010007; - -inline fn __rol_4u64_rho56(reg u256 a) -> reg u256 -{ - reg u256 r; - - r = #VPSHUFB_256(a, rho56); - - return r; -} - - -inline fn __rol_4u64_rho8(reg u256 a) -> reg u256 -{ - reg u256 r; - - r = #VPSHUFB_256(a, rho8); - - return r; -} - - -inline fn __rol_4u64(reg u256 a, inline int o) -> reg u256 -{ - reg u256 r; - reg u256 t256; - - r = #VPSLL_4u64(a, o); - t256 = #VPSRL_4u64(a, 64 - o); - - r |= t256; - - return r; -} - - -param int ba=0; -param int be=1; -param int bi=2; -param int bo=3; -param int bu=4; -param int ga=5; -param int ge=6; -param int gi=7; -param int go=8; -param int gu=9; -param int ka=10; -param int ke=11; -param int ki=12; -param int ko=13; -param int ku=14; -param int ma=15; -param int me=16; -param int mi=17; -param int mo=18; -param int mu=19; -param int sa=20; -param int se=21; -param int si=22; -param int so=23; -param int su=24; - -u256[24] KeccakF1600RoundConstants = { - 0x0000000000000001000000000000000100000000000000010000000000000001, - 0x0000000000008082000000000000808200000000000080820000000000008082, - 0x800000000000808a800000000000808a800000000000808a800000000000808a, - 0x8000000080008000800000008000800080000000800080008000000080008000, - 0x000000000000808b000000000000808b000000000000808b000000000000808b, - 0x0000000080000001000000008000000100000000800000010000000080000001, - 0x8000000080008081800000008000808180000000800080818000000080008081, - 0x8000000000008009800000000000800980000000000080098000000000008009, - 0x000000000000008a000000000000008a000000000000008a000000000000008a, - 0x0000000000000088000000000000008800000000000000880000000000000088, - 0x0000000080008009000000008000800900000000800080090000000080008009, - 0x000000008000000a000000008000000a000000008000000a000000008000000a, - 0x000000008000808b000000008000808b000000008000808b000000008000808b, - 0x800000000000008b800000000000008b800000000000008b800000000000008b, - 0x8000000000008089800000000000808980000000000080898000000000008089, - 0x8000000000008003800000000000800380000000000080038000000000008003, - 0x8000000000008002800000000000800280000000000080028000000000008002, - 0x8000000000000080800000000000008080000000000000808000000000000080, - 0x000000000000800a000000000000800a000000000000800a000000000000800a, - 0x800000008000000a800000008000000a800000008000000a800000008000000a, - 0x8000000080008081800000008000808180000000800080818000000080008081, - 0x8000000000008080800000000000808080000000000080808000000000008080, - 0x0000000080000001000000008000000100000000800000010000000080000001, - 0x8000000080008008800000008000800880000000800080088000000080008008 - }; - -inline fn __prepare_theta(reg ptr u256[25] A_4x) -> reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Ca, Ce, Ci, Co, Cu; - - // Ca = XOR256(Aba, XOR256(Aga, XOR256(Aka, XOR256(Ama, Asa)))); - Ca = A_4x[sa]; - Ca ^= A_4x[ma]; - Ca ^= A_4x[ka]; - Ca ^= A_4x[ga]; - Ca ^= A_4x[ba]; - - // Ce = XOR256(Abe, XOR256(Age, XOR256(Ake, XOR256(Ame, Ase)))); - Ce = A_4x[se]; - Ce ^= A_4x[me]; - Ce ^= A_4x[ke]; - Ce ^= A_4x[ge]; - Ce ^= A_4x[be]; - - // Ci = XOR256(Abi, XOR256(Agi, XOR256(Aki, XOR256(Ami, Asi)))); - Ci = A_4x[si]; - Ci ^= A_4x[mi]; - Ci ^= A_4x[ki]; - Ci ^= A_4x[gi]; - Ci ^= A_4x[bi]; - - // Co = XOR256(Abo, XOR256(Ago, XOR256(Ako, XOR256(Amo, Aso)))); - Co = A_4x[so]; - Co ^= A_4x[mo]; - Co ^= A_4x[ko]; - Co ^= A_4x[go]; - Co ^= A_4x[bo]; - - // Cu = XOR256(Abu, XOR256(Agu, XOR256(Aku, XOR256(Amu, Asu)))); - Cu = A_4x[su]; - Cu ^= A_4x[mu]; - Cu ^= A_4x[ku]; - Cu ^= A_4x[gu]; - Cu ^= A_4x[bu]; - - return Ca, Ce, Ci, Co, Cu; -} - -inline fn __first(reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu) -> reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Da, De, Di, Do, Du; - reg u256 Ca1, Ce1, Ci1, Co1, Cu1; - - Ce1 = __rol_4u64(Ce, 1); - Da = Cu ^ Ce1; - - Ci1 = __rol_4u64(Ci, 1); - De = Ca ^ Ci1; - - Co1 = __rol_4u64(Co, 1); - Di = Ce ^ Co1; - - Cu1 = __rol_4u64(Cu, 1); - Do = Ci ^ Cu1; - - Ca1 = __rol_4u64(Ca, 1); - Du = Co ^ Ca1; - - return Da, De, Di, Do, Du; -} - - -inline fn __second_even( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, inline int index, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bba, Bbe, Bbi, Bbo, Bbu; - reg u256 t256; - - t256 = A_4x[ba]; - t256 ^= Da; - A_4x[ba] = t256; - Bba = t256; - - t256 = A_4x[ge]; - t256 ^= De; - A_4x[ge] = t256; - Bbe = __rol_4u64(t256, 44); - - t256 = A_4x[ki]; - t256 ^= Di; - A_4x[ki] = t256; - Bbi = __rol_4u64(t256, 43); - - // E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); XOReq256(E##ba, CONST256_64(KeccakF1600RoundConstants[i])); - t256 = #VPANDN_256(Bbe, Bbi); - t256 ^= Bba; - t256 ^= KeccakF1600RoundConstants[index]; - E_4x[ba] = t256; - - Ca = t256; - - t256 = A_4x[mo]; - t256 ^= Do; - A_4x[mo] = t256; - Bbo = __rol_4u64(t256, 21); - - // E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); - t256 = #VPANDN_256(Bbi, Bbo); - t256 ^= Bbe; - E_4x[be] = t256; - - Ce = t256; - - t256 = A_4x[su]; - t256 ^= Du; - A_4x[su] = t256; - Bbu = __rol_4u64(t256, 14); - - // E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); - t256 = #VPANDN_256(Bbo, Bbu); - t256 ^= Bbi; - E_4x[bi] = t256; - - Ci = t256; - - // E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); - t256 = #VPANDN_256(Bbu, Bba); - t256 ^= Bbo; - E_4x[bo] = t256; - - Co = t256; - - // E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); - t256 = #VPANDN_256(Bba, Bbe); - t256 ^= Bbu; - E_4x[bu] = t256; - - Cu = t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __third_even( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bga, Bge, Bgi, Bgo, Bgu; - reg u256 t256; - - t256 = A_4x[bo]; - t256 ^= Do; - A_4x[bo] = t256; - Bga = __rol_4u64(t256, 28); - - t256 = A_4x[gu]; - t256 ^= Du; - A_4x[gu] = t256; - Bge = __rol_4u64(t256, 20); - - t256 = A_4x[ka]; - t256 ^= Da; - A_4x[ka] = t256; - Bgi = __rol_4u64(t256, 3); - - // E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)) - t256 = #VPANDN_256(Bge, Bgi); - t256 ^= Bga; - E_4x[ga] = t256; - - Ca ^= t256; - - t256 = A_4x[me]; - t256 ^= De; - A_4x[me] = t256; - Bgo = __rol_4u64(t256, 45); - - // E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)) - t256 = #VPANDN_256(Bgi, Bgo); - t256 ^= Bge; - E_4x[ge] = t256; - - Ce ^= t256; - - t256 = A_4x[si]; - t256 ^= Di; - A_4x[si] = t256; - Bgu = __rol_4u64(t256, 61); - - // E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)) - t256 = #VPANDN_256(Bgo, Bgu); - t256 ^= Bgi; - E_4x[gi] = t256; - - Ci ^= t256; - - // E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); - t256 = #VPANDN_256(Bgu, Bga); - t256 ^= Bgo; - E_4x[go] = t256; - - Co ^= t256; - - // E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); - t256 = #VPANDN_256(Bga, Bge); - t256 ^= Bgu; - E_4x[gu] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __fourth_even( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bka, Bke, Bki, Bko, Bku; - reg u256 t256; - - t256 = A_4x[be]; - t256 ^= De; - A_4x[be] = t256; - Bka = __rol_4u64(t256, 1); - - t256 = A_4x[gi]; - t256 ^= Di; - A_4x[gi] = t256; - Bke = __rol_4u64(t256, 6); - - t256 = A_4x[ko]; - t256 ^= Do; - A_4x[ko] = t256; - Bki = __rol_4u64(t256, 25); - - // E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); - t256 = #VPANDN_256(Bke, Bki); - t256 ^= Bka; - E_4x[ka] = t256; - - Ca ^= t256; - - t256 = A_4x[mu]; - t256 ^= Du; - A_4x[mu] = t256; - Bko = __rol_4u64_rho8(t256); - - // E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); - t256 = #VPANDN_256(Bki, Bko); - t256 ^= Bke; - E_4x[ke] = t256; - - Ce ^= t256; - - t256 = A_4x[sa]; - t256 ^= Da; - A_4x[sa] = t256; - Bku = __rol_4u64(t256, 18); - - // E##ki = XOR256(Bki, ANDnu256(Bko, Bku)) - t256 = #VPANDN_256(Bko, Bku); - t256 ^= Bki; - E_4x[ki] = t256; - - Ci ^= t256; - - // E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); - t256 = #VPANDN_256(Bku, Bka); - t256 ^= Bko; - E_4x[ko] = t256; - - Co ^= t256; - - // E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); - t256 = #VPANDN_256(Bka, Bke); - t256 ^= Bku; - E_4x[ku] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __fifth_even( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bma, Bme, Bmi, Bmo, Bmu; - reg u256 t256; - - t256 = A_4x[bu]; - t256 ^= Du; - A_4x[bu] = t256; - Bma = __rol_4u64(t256, 27); - - t256 = A_4x[ga]; - t256 ^= Da; - A_4x[ga] = t256; - Bme = __rol_4u64(t256, 36); - - t256 = A_4x[ke]; - t256 ^= De; - A_4x[ke] = t256; - Bmi = __rol_4u64(t256, 10); - - // E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); - t256 = #VPANDN_256(Bme, Bmi); - t256 ^= Bma; - E_4x[ma] = t256; - - Ca ^= t256; - - t256 = A_4x[mi]; - t256 ^= Di; - A_4x[mi] = t256; - Bmo = __rol_4u64(t256, 15); - - // E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); - t256 = #VPANDN_256(Bmi, Bmo); - t256 ^= Bme; - E_4x[me] = t256; - - Ce ^= t256; - - t256 = A_4x[so]; - t256 ^= Do; - A_4x[so] = t256; - Bmu = __rol_4u64_rho56(t256); - - // E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); - t256 = #VPANDN_256(Bmo, Bmu); - t256 ^= Bmi; - E_4x[mi] = t256; - - Ci ^= t256; - - // E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); - t256 = #VPANDN_256(Bmu, Bma); - t256 ^= Bmo; - E_4x[mo] = t256; - - Co ^= t256; - - // E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); - t256 = #VPANDN_256(Bma, Bme); - t256 ^= Bmu; - E_4x[mu] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __sixth_even( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bsa, Bse, Bsi, Bso, Bsu; - reg u256 t256; - - t256 = A_4x[bi]; - t256 ^= Di; - A_4x[bi] = t256; - Bsa = __rol_4u64(t256, 62); - - t256 = A_4x[go]; - t256 ^= Do; - A_4x[go] = t256; - Bse = __rol_4u64(t256, 55); - - t256 = A_4x[ku]; - t256 ^= Du; - A_4x[ku] = t256; - Bsi = __rol_4u64(t256, 39); - - // E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); - t256 = #VPANDN_256(Bse, Bsi); - t256 ^= Bsa; - E_4x[sa] = t256; - - Ca ^= t256; - - t256 = A_4x[ma]; - t256 ^= Da; - A_4x[ma] = t256; - Bso = __rol_4u64(t256, 41); - - // E##se = XOR256(Bse, ANDnu256(Bsi, Bso)) - t256 = #VPANDN_256(Bsi, Bso); - t256 ^= Bse; - E_4x[se] = t256; - - Ce ^= t256; - - t256 = A_4x[se]; - t256 ^= De; - A_4x[se] = t256; - Bsu = __rol_4u64(t256, 2); - - // E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); - t256 = #VPANDN_256(Bso, Bsu); - t256 ^= Bsi; - E_4x[si] = t256; - - Ci ^= t256; - - // E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); - t256 = #VPANDN_256(Bsu, Bsa); - t256 ^= Bso; - E_4x[so] = t256; - - Co ^= t256; - - // E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); - t256 = #VPANDN_256(Bsa, Bse); - t256 ^= Bsu; - E_4x[su] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __second_odd( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, inline int index, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bba, Bbe, Bbi, Bbo, Bbu; - reg u256 t256; - - t256 = A_4x[ba]; - t256 ^= Da; - A_4x[ba] = t256; - Bba = t256; - - t256 = A_4x[ge]; - t256 ^= De; - A_4x[ge] = t256; - Bbe = __rol_4u64(t256, 44); - - t256 = A_4x[ki]; - t256 ^= Di; - A_4x[ki] = t256; - Bbi = __rol_4u64(t256, 43); - - // E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); XOReq256(E##ba, CONST256_64(KeccakF1600RoundConstants[i])); - t256 = #VPANDN_256(Bbe, Bbi); - t256 ^= Bba; - t256 ^= KeccakF1600RoundConstants[index]; - E_4x[ba] = t256; - - Ca = t256; - - t256 = A_4x[mo]; - t256 ^= Do; - A_4x[mo] = t256; - Bbo = __rol_4u64(t256, 21); - - // E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); - t256 = #VPANDN_256(Bbi, Bbo); - t256 ^= Bbe; - E_4x[be] = t256; - - Ce = t256; - - t256 = A_4x[su]; - t256 ^= Du; - A_4x[su] = t256; - Bbu = __rol_4u64(t256, 14); - - // E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); - t256 = #VPANDN_256(Bbo, Bbu); - t256 ^= Bbi; - E_4x[bi] = t256; - - Ci = t256; - - // E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); - t256 = #VPANDN_256(Bbu, Bba); - t256 ^= Bbo; - E_4x[bo] = t256; - - Co = t256; - - // E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); - t256 = #VPANDN_256(Bba, Bbe); - t256 ^= Bbu; - E_4x[bu] = t256; - - Cu = t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __third_odd( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bga, Bge, Bgi, Bgo, Bgu; - reg u256 t256; - - t256 = A_4x[bo]; - t256 ^= Do; - A_4x[bo] = t256; - Bga = __rol_4u64(t256, 28); - - t256 = A_4x[gu]; - t256 ^= Du; - A_4x[gu] = t256; - Bge = __rol_4u64(t256, 20); - - t256 = A_4x[ka]; - t256 ^= Da; - A_4x[ka] = t256; - Bgi = __rol_4u64(t256, 3); - - // E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)) - t256 = #VPANDN_256(Bge, Bgi); - t256 ^= Bga; - E_4x[ga] = t256; - - Ca ^= t256; - - t256 = A_4x[me]; - t256 ^= De; - A_4x[me] = t256; - Bgo = __rol_4u64(t256, 45); - - // E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)) - t256 = #VPANDN_256(Bgi, Bgo); - t256 ^= Bge; - E_4x[ge] = t256; - - Ce ^= t256; - - t256 = A_4x[si]; - t256 ^= Di; - A_4x[si] = t256; - Bgu = __rol_4u64(t256, 61); - - // E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)) - t256 = #VPANDN_256(Bgo, Bgu); - t256 ^= Bgi; - E_4x[gi] = t256; - - Ci ^= t256; - - // E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); - t256 = #VPANDN_256(Bgu, Bga); - t256 ^= Bgo; - E_4x[go] = t256; - - Co ^= t256; - - // E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); - t256 = #VPANDN_256(Bga, Bge); - t256 ^= Bgu; - E_4x[gu] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __fourth_odd( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bka, Bke, Bki, Bko, Bku; - reg u256 t256; - - t256 = A_4x[be]; - t256 ^= De; - A_4x[be] = t256; - Bka = __rol_4u64(t256, 1); - - t256 = A_4x[gi]; - t256 ^= Di; - A_4x[gi] = t256; - Bke = __rol_4u64(t256, 6); - - t256 = A_4x[ko]; - t256 ^= Do; - A_4x[ko] = t256; - Bki = __rol_4u64(t256, 25); - - // E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); - t256 = #VPANDN_256(Bke, Bki); - t256 ^= Bka; - E_4x[ka] = t256; - - Ca ^= t256; - - t256 = A_4x[mu]; - t256 ^= Du; - A_4x[mu] = t256; - Bko = __rol_4u64_rho8(t256); - - // E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); - t256 = #VPANDN_256(Bki, Bko); - t256 ^= Bke; - E_4x[ke] = t256; - - Ce ^= t256; - - t256 = A_4x[sa]; - t256 ^= Da; - A_4x[sa] = t256; - Bku = __rol_4u64(t256, 18); - - // E##ki = XOR256(Bki, ANDnu256(Bko, Bku)) - t256 = #VPANDN_256(Bko, Bku); - t256 ^= Bki; - E_4x[ki] = t256; - - Ci ^= t256; - - // E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); - t256 = #VPANDN_256(Bku, Bka); - t256 ^= Bko; - E_4x[ko] = t256; - - Co ^= t256; - - // E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); - t256 = #VPANDN_256(Bka, Bke); - t256 ^= Bku; - E_4x[ku] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __fifth_odd( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bma, Bme, Bmi, Bmo, Bmu; - reg u256 t256; - - t256 = A_4x[bu]; - t256 ^= Du; - A_4x[bu] = t256; - Bma = __rol_4u64(t256, 27); - - t256 = A_4x[ga]; - t256 ^= Da; - A_4x[ga] = t256; - Bme = __rol_4u64(t256, 36); - - t256 = A_4x[ke]; - t256 ^= De; - A_4x[ke] = t256; - Bmi = __rol_4u64(t256, 10); - - // E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); - t256 = #VPANDN_256(Bme, Bmi); - t256 ^= Bma; - E_4x[ma] = t256; - - Ca ^= t256; - - t256 = A_4x[mi]; - t256 ^= Di; - A_4x[mi] = t256; - Bmo = __rol_4u64(t256, 15); - - // E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); - t256 = #VPANDN_256(Bmi, Bmo); - t256 ^= Bme; - E_4x[me] = t256; - - Ce ^= t256; - - t256 = A_4x[so]; - t256 ^= Do; - A_4x[so] = t256; - Bmu = __rol_4u64_rho56(t256); - - // E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); - t256 = #VPANDN_256(Bmo, Bmu); - t256 ^= Bmi; - E_4x[mi] = t256; - - Ci ^= t256; - - // E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); - t256 = #VPANDN_256(Bmu, Bma); - t256 ^= Bmo; - E_4x[mo] = t256; - - Co ^= t256; - - // E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); - t256 = #VPANDN_256(Bma, Bme); - t256 ^= Bmu; - E_4x[mu] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __sixth_odd( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Bsa, Bse, Bsi, Bso, Bsu; - reg u256 t256; - - t256 = A_4x[bi]; - t256 ^= Di; - A_4x[bi] = t256; - Bsa = __rol_4u64(t256, 62); - - t256 = A_4x[go]; - t256 ^= Do; - A_4x[go] = t256; - Bse = __rol_4u64(t256, 55); - - t256 = A_4x[ku]; - t256 ^= Du; - A_4x[ku] = t256; - Bsi = __rol_4u64(t256, 39); - - // E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); - t256 = #VPANDN_256(Bse, Bsi); - t256 ^= Bsa; - E_4x[sa] = t256; - - Ca ^= t256; - - t256 = A_4x[ma]; - t256 ^= Da; - A_4x[ma] = t256; - Bso = __rol_4u64(t256, 41); - - // E##se = XOR256(Bse, ANDnu256(Bsi, Bso)) - t256 = #VPANDN_256(Bsi, Bso); - t256 ^= Bse; - E_4x[se] = t256; - - Ce ^= t256; - - t256 = A_4x[se]; - t256 ^= De; - A_4x[se] = t256; - Bsu = __rol_4u64(t256, 2); - - // E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); - t256 = #VPANDN_256(Bso, Bsu); - t256 ^= Bsi; - E_4x[si] = t256; - - Ci ^= t256; - - // E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); - t256 = #VPANDN_256(Bsu, Bsa); - t256 ^= Bso; - E_4x[so] = t256; - - Co ^= t256; - - // E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); - t256 = #VPANDN_256(Bsa, Bse); - t256 ^= Bsu; - E_4x[su] = t256; - - Cu ^= t256; - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __second_last( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, inline int index, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25] -{ - reg u256 Bba, Bbe, Bbi, Bbo, Bbu; - reg u256 t256; - - t256 = A_4x[ba]; - t256 ^= Da; - A_4x[ba] = t256; - Bba = t256; - - t256 = A_4x[ge]; - t256 ^= De; - A_4x[ge] = t256; - Bbe = __rol_4u64(t256, 44); - - t256 = A_4x[ki]; - t256 ^= Di; - A_4x[ki] = t256; - Bbi = __rol_4u64(t256, 43); - - // E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); XOReq256(E##ba, CONST256_64(KeccakF1600RoundConstants[i])); - t256 = #VPANDN_256(Bbe, Bbi); - t256 ^= Bba; - t256 ^= KeccakF1600RoundConstants[index]; - E_4x[ba] = t256; - - t256 = A_4x[mo]; - t256 ^= Do; - A_4x[mo] = t256; - Bbo = __rol_4u64(t256, 21); - - // E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); - t256 = #VPANDN_256(Bbi, Bbo); - t256 ^= Bbe; - E_4x[be] = t256; - - t256 = A_4x[su]; - t256 ^= Du; - A_4x[su] = t256; - Bbu = __rol_4u64(t256, 14); - - // E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); - t256 = #VPANDN_256(Bbo, Bbu); - t256 ^= Bbi; - E_4x[bi] = t256; - - // E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); - t256 = #VPANDN_256(Bbu, Bba); - t256 ^= Bbo; - E_4x[bo] = t256; - - // E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); - t256 = #VPANDN_256(Bba, Bbe); - t256 ^= Bbu; - E_4x[bu] = t256; - - return A_4x, E_4x; -} - -inline fn __third_last( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25] -{ - reg u256 Bga, Bge, Bgi, Bgo, Bgu; - reg u256 t256; - - t256 = A_4x[bo]; - t256 ^= Do; - A_4x[bo] = t256; - Bga = __rol_4u64(t256, 28); - - t256 = A_4x[gu]; - t256 ^= Du; - A_4x[gu] = t256; - Bge = __rol_4u64(t256, 20); - - t256 = A_4x[ka]; - t256 ^= Da; - A_4x[ka] = t256; - Bgi = __rol_4u64(t256, 3); - - // E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)) - t256 = #VPANDN_256(Bge, Bgi); - t256 ^= Bga; - E_4x[ga] = t256; - - t256 = A_4x[me]; - t256 ^= De; - A_4x[me] = t256; - Bgo = __rol_4u64(t256, 45); - - // E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)) - t256 = #VPANDN_256(Bgi, Bgo); - t256 ^= Bge; - E_4x[ge] = t256; - - t256 = A_4x[si]; - t256 ^= Di; - A_4x[si] = t256; - Bgu = __rol_4u64(t256, 61); - - // E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)) - t256 = #VPANDN_256(Bgo, Bgu); - t256 ^= Bgi; - E_4x[gi] = t256; - - // E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); - t256 = #VPANDN_256(Bgu, Bga); - t256 ^= Bgo; - E_4x[go] = t256; - - // E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); - t256 = #VPANDN_256(Bga, Bge); - t256 ^= Bgu; - E_4x[gu] = t256; - - return A_4x, E_4x; -} - -inline fn __fourth_last( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25] -{ - reg u256 Bka, Bke, Bki, Bko, Bku; - reg u256 t256; - - t256 = A_4x[be]; - t256 ^= De; - A_4x[be] = t256; - Bka = __rol_4u64(t256, 1); - - t256 = A_4x[gi]; - t256 ^= Di; - A_4x[gi] = t256; - Bke = __rol_4u64(t256, 6); - - t256 = A_4x[ko]; - t256 ^= Do; - A_4x[ko] = t256; - Bki = __rol_4u64(t256, 25); - - // E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); - t256 = #VPANDN_256(Bke, Bki); - t256 ^= Bka; - E_4x[ka] = t256; - - t256 = A_4x[mu]; - t256 ^= Du; - A_4x[mu] = t256; - Bko = __rol_4u64_rho8(t256); - - // E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); - t256 = #VPANDN_256(Bki, Bko); - t256 ^= Bke; - E_4x[ke] = t256; - - t256 = A_4x[sa]; - t256 ^= Da; - A_4x[sa] = t256; - Bku = __rol_4u64(t256, 18); - - // E##ki = XOR256(Bki, ANDnu256(Bko, Bku)) - t256 = #VPANDN_256(Bko, Bku); - t256 ^= Bki; - E_4x[ki] = t256; - - // E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); - t256 = #VPANDN_256(Bku, Bka); - t256 ^= Bko; - E_4x[ko] = t256; - - // E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); - t256 = #VPANDN_256(Bka, Bke); - t256 ^= Bku; - E_4x[ku] = t256; - - return A_4x, E_4x; -} - -inline fn __fifth_last( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25] -{ - reg u256 Bma, Bme, Bmi, Bmo, Bmu; - reg u256 t256; - - t256 = A_4x[bu]; - t256 ^= Du; - A_4x[bu] = t256; - Bma = __rol_4u64(t256, 27); - - t256 = A_4x[ga]; - t256 ^= Da; - A_4x[ga] = t256; - Bme = __rol_4u64(t256, 36); - - t256 = A_4x[ke]; - t256 ^= De; - A_4x[ke] = t256; - Bmi = __rol_4u64(t256, 10); - - // E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); - t256 = #VPANDN_256(Bme, Bmi); - t256 ^= Bma; - E_4x[ma] = t256; - - t256 = A_4x[mi]; - t256 ^= Di; - A_4x[mi] = t256; - Bmo = __rol_4u64(t256, 15); - - // E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); - t256 = #VPANDN_256(Bmi, Bmo); - t256 ^= Bme; - E_4x[me] = t256; - - t256 = A_4x[so]; - t256 ^= Do; - A_4x[so] = t256; - Bmu = __rol_4u64_rho56(t256); - - // E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); - t256 = #VPANDN_256(Bmo, Bmu); - t256 ^= Bmi; - E_4x[mi] = t256; - - // E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); - t256 = #VPANDN_256(Bmu, Bma); - t256 ^= Bmo; - E_4x[mo] = t256; - - // E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); - t256 = #VPANDN_256(Bma, Bme); - t256 ^= Bmu; - E_4x[mu] = t256; - - return A_4x, E_4x; -} - -inline fn __sixth_last( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, -reg u256 Da, reg u256 De, reg u256 Di, reg u256 Do, reg u256 Du) --> reg ptr u256[25], reg ptr u256[25] -{ - reg u256 Bsa, Bse, Bsi, Bso, Bsu; - reg u256 t256; - - t256 = A_4x[bi]; - t256 ^= Di; - A_4x[bi] = t256; - Bsa = __rol_4u64(t256, 62); - - t256 = A_4x[go]; - t256 ^= Do; - A_4x[go] = t256; - Bse = __rol_4u64(t256, 55); - - t256 = A_4x[ku]; - t256 ^= Du; - A_4x[ku] = t256; - Bsi = __rol_4u64(t256, 39); - - // E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); - t256 = #VPANDN_256(Bse, Bsi); - t256 ^= Bsa; - E_4x[sa] = t256; - - t256 = A_4x[ma]; - t256 ^= Da; - A_4x[ma] = t256; - Bso = __rol_4u64(t256, 41); - - // E##se = XOR256(Bse, ANDnu256(Bsi, Bso)) - t256 = #VPANDN_256(Bsi, Bso); - t256 ^= Bse; - E_4x[se] = t256; - - t256 = A_4x[se]; - t256 ^= De; - A_4x[se] = t256; - Bsu = __rol_4u64(t256, 2); - - // E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); - t256 = #VPANDN_256(Bso, Bsu); - t256 ^= Bsi; - E_4x[si] = t256; - - // E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); - t256 = #VPANDN_256(Bsu, Bsa); - t256 ^= Bso; - E_4x[so] = t256; - - // E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); - t256 = #VPANDN_256(Bsa, Bse); - t256 ^= Bsu; - E_4x[su] = t256; - - return A_4x, E_4x; -} - -inline fn __theta_rho_pi_chi_iota_prepare_theta_even( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, inline int index, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Da, De, Di, Do, Du; - - Da, De, Di, Do, Du = __first(Ca, Ce, Ci, Co, Cu); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __second_even(A_4x, E_4x, index, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __third_even(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __fourth_even(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __fifth_even(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __sixth_even(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __theta_rho_pi_chi_iota_prepare_theta_odd( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, inline int index, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu) --> reg ptr u256[25], reg ptr u256[25], reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 Da, De, Di, Do, Du; - - Da, De, Di, Do, Du = __first(Ca, Ce, Ci, Co, Cu); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __second_odd(A_4x, E_4x, index, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __third_odd(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __fourth_odd(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __fifth_odd(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __sixth_odd(A_4x, E_4x, Ca, Ce, Ci, Co, Cu, Da, De, Di, Do, Du); - - return A_4x, E_4x, Ca, Ce, Ci, Co, Cu; -} - -inline fn __theta_rho_pi_chi_iota( -reg ptr u256[25] A_4x, reg ptr u256[25] E_4x, inline int index, -reg u256 Ca, reg u256 Ce, reg u256 Ci, reg u256 Co, reg u256 Cu) --> reg ptr u256[25], reg ptr u256[25] -{ - reg u256 Da, De, Di, Do, Du; - - Da, De, Di, Do, Du = __first(Ca, Ce, Ci, Co, Cu); - - A_4x, E_4x = __second_last(A_4x, E_4x, index, Da, De, Di, Do, Du); - - A_4x, E_4x = __third_last(A_4x, E_4x, Da, De, Di, Do, Du); - - A_4x, E_4x = __fourth_last(A_4x, E_4x, Da, De, Di, Do, Du); - - A_4x, E_4x = __fifth_last(A_4x, E_4x, Da, De, Di, Do, Du); - - A_4x, E_4x = __sixth_last(A_4x, E_4x, Da, De, Di, Do, Du); - - return A_4x, E_4x; -} - -fn _KeccakF1600_StatePermute4x(reg ptr u256[25] A_4x) -> reg ptr u256[25] -{ - reg u256 Ca, Ce, Ci, Co, Cu; - - stack u256[25] E_4x; - - /** Rounds24 **/ - Ca, Ce, Ci, Co, Cu = __prepare_theta(A_4x); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 0, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 1, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 2, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 3, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 4, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 5, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 6, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 7, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 8, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 9, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 10, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 11, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 12, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 13, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 14, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 15, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 16, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 17, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 18, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 19, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 20, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_odd(E_4x, A_4x, 21, Ca, Ce, Ci, Co, Cu); - A_4x, E_4x, Ca, Ce, Ci, Co, Cu = __theta_rho_pi_chi_iota_prepare_theta_even(A_4x, E_4x, 22, Ca, Ce, Ci, Co, Cu); - E_4x, A_4x = __theta_rho_pi_chi_iota(E_4x, A_4x, 23, Ca, Ce, Ci, Co, Cu); - - - return A_4x; -} - - -fn _shake128_absorb4x_34(reg ptr u256[25] s, reg ptr u8[34] m0 m1 m2 m3) -> reg ptr u256[25] -{ - inline int i; - reg u256 t0 t1; - reg u16 t16; - reg u64 t64; - - for i = 0 to 25 - { - t0 = #set0_256(); - s[i] = t0; - } - - for i = 0 to 4 - { - t64 = m0[u64 i]; - s[u64 4 * i] ^= t64; - t64 = m1[u64 i]; - s[u64 4 * i + 1] ^= t64; - t64 = m2[u64 i]; - s[u64 4 * i + 2] ^= t64; - t64 = m3[u64 i]; - s[u64 4 * i + 3] ^= t64; - } - - t16 = m0.[u16 32]; - s[u16 64] ^= t16; - s[u8 130] ^= 0x1F; - - t16 = m1.[u16 32]; - s[u16 68] ^= t16; - s[u8 138] ^= 0x1F; - - t16 = m2.[u16 32]; - s[u16 72] ^= t16; - s[u8 146] ^= 0x1F; - - t16 = m3.[u16 32]; - s[u16 76] ^= t16; - s[u8 154] ^= 0x1F; - - t0 = shake_sep[u256 0]; - t1 = s[SHAKE128_RATE / 8 - 1]; - t0 = t0 ^ t1; - s[SHAKE128_RATE / 8 - 1] = t0; - - return s; -} - - -inline -fn __shake128_squeezeblock4x(reg ptr u256[25] state, reg ptr u8[SHAKE128_RATE] h0 h1 h2 h3) -> reg ptr u256[25], reg ptr u8[SHAKE128_RATE], reg ptr u8[SHAKE128_RATE], reg ptr u8[SHAKE128_RATE], reg ptr u8[SHAKE128_RATE] -{ - reg u256 t256; - reg u128 t128; - inline int i; - - state = _KeccakF1600_StatePermute4x(state); - - for i = 0 to (SHAKE128_RATE / 8) { - t256 = state[i]; - t128 = (128u)t256; - h0[u64 i] = #VMOVLPD(t128); - h1[u64 i] = #VMOVHPD(t128); - t128 = #VEXTRACTI128(t256, 1); - h2[u64 i] = #VMOVLPD(t128); - h3[u64 i] = #VMOVHPD(t128); - } - - return state, h0, h1, h2, h3; -} - - -fn _shake256_absorb4x_33(reg ptr u256[25] s, reg ptr u8[33] m0 m1 m2 m3) -> reg ptr u256[25] -{ - inline int i; - reg u256 t0 t1; - reg u64 t64; - reg u8 t8; - - for i = 0 to 25 - { - t0 = #set0_256(); - s[i] = t0; - } - - for i = 0 to 4 - { - t64 = m0[u64 i]; - s[u64 4 * i] ^= t64; - t64 = m1[u64 i]; - s[u64 4 * i + 1] ^= t64; - t64 = m2[u64 i]; - s[u64 4 * i + 2] ^= t64; - t64 = m3[u64 i]; - s[u64 4 * i + 3] ^= t64; - } - - t8 = m0[32]; - s[u8 128] ^= t8; - s[u8 129] ^= 0x1F; - - t8 = m1[32]; - s[u8 136] ^= t8; - s[u8 137] ^= 0x1F; - - t8 = m2[32]; - s[u8 144] ^= t8; - s[u8 145] ^= 0x1F; - - t8 = m3[32]; - s[u8 152] ^= t8; - s[u8 153] ^= 0x1F; - - t0 = shake_sep[u256 0]; - t1 = s[SHAKE256_RATE / 8 - 1]; - t0 = t0 ^ t1; - s[SHAKE256_RATE / 8 - 1] = t0; - - return s; -} - - -inline -fn __shake256_squeezeblock4x(reg ptr u256[25] state, reg ptr u8[SHAKE256_RATE] h0 h1 h2 h3) -> reg ptr u256[25], reg ptr u8[SHAKE256_RATE], reg ptr u8[SHAKE256_RATE], reg ptr u8[SHAKE256_RATE], reg ptr u8[SHAKE256_RATE] -{ - reg u256 t256; - reg u128 t128; - inline int i; - - state = _KeccakF1600_StatePermute4x(state); - - for i = 0 to (SHAKE256_RATE / 8) { - t256 = state[i]; - t128 = (128u)t256; - h0[u64 i] = #VMOVLPD(t128); - h1[u64 i] = #VMOVHPD(t128); - t128 = #VEXTRACTI128(t256, 1); - h2[u64 i] = #VMOVLPD(t128); - h3[u64 i] = #VMOVHPD(t128); - } - - return state, h0, h1, h2, h3; -} diff --git a/code/jasmin/avx2v/fips202_common.jinc b/code/jasmin/avx2v/fips202_common.jinc deleted file mode 100644 index 0ed82a08..00000000 --- a/code/jasmin/avx2v/fips202_common.jinc +++ /dev/null @@ -1,6 +0,0 @@ -param int SHAKE128_RATE = 168; -param int SHAKE256_RATE = 136; -param int SHA3_256_RATE = 136; -param int SHA3_512_RATE = 72; - -u64[4] shake_sep = {9223372036854775808, 9223372036854775808, 9223372036854775808, 9223372036854775808}; diff --git a/code/jasmin/avx2v/fq.S b/code/jasmin/avx2v/fq.S deleted file mode 100644 index d4c5c902..00000000 --- a/code/jasmin/avx2v/fq.S +++ /dev/null @@ -1,129 +0,0 @@ -#include "consts.h" -.include "fq.inc" - -.text -reduce128_avx: -#load -vmovdqa (%rdi),%ymm2 -vmovdqa 32(%rdi),%ymm3 -vmovdqa 64(%rdi),%ymm4 -vmovdqa 96(%rdi),%ymm5 -vmovdqa 128(%rdi),%ymm6 -vmovdqa 160(%rdi),%ymm7 -vmovdqa 192(%rdi),%ymm8 -vmovdqa 224(%rdi),%ymm9 - -red16 2,10 -red16 3,11 -red16 4,12 -red16 5,13 -red16 6,14 -red16 7,15 -red16 8,10 -red16 9,11 - -#store -vmovdqa %ymm2,(%rdi) -vmovdqa %ymm3,32(%rdi) -vmovdqa %ymm4,64(%rdi) -vmovdqa %ymm5,96(%rdi) -vmovdqa %ymm6,128(%rdi) -vmovdqa %ymm7,160(%rdi) -vmovdqa %ymm8,192(%rdi) -vmovdqa %ymm9,224(%rdi) - -ret - -.global cdecl(reduce_avx) -cdecl(reduce_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XV*2(%rsi),%ymm1 -call reduce128_avx -add $256,%rdi -call reduce128_avx -ret - -csubq128_avx: -#load -vmovdqa (%rdi),%ymm1 -vmovdqa 32(%rdi),%ymm2 -vmovdqa 64(%rdi),%ymm3 -vmovdqa 96(%rdi),%ymm4 -vmovdqa 128(%rdi),%ymm5 -vmovdqa 160(%rdi),%ymm6 -vmovdqa 192(%rdi),%ymm7 -vmovdqa 224(%rdi),%ymm8 - -csubq 1,9 -csubq 2,10 -csubq 3,11 -csubq 4,12 -csubq 5,13 -csubq 6,14 -csubq 7,15 -csubq 8,9 - -#store -vmovdqa %ymm1,(%rdi) -vmovdqa %ymm2,32(%rdi) -vmovdqa %ymm3,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm5,128(%rdi) -vmovdqa %ymm6,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm8,224(%rdi) - -ret - -.global cdecl(csubq_avx) -cdecl(csubq_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -call csubq128_avx -add $256,%rdi -call csubq128_avx -ret - -tomont128_avx: -#load -vmovdqa (%rdi),%ymm3 -vmovdqa 32(%rdi),%ymm4 -vmovdqa 64(%rdi),%ymm5 -vmovdqa 96(%rdi),%ymm6 -vmovdqa 128(%rdi),%ymm7 -vmovdqa 160(%rdi),%ymm8 -vmovdqa 192(%rdi),%ymm9 -vmovdqa 224(%rdi),%ymm10 - -fqmulprecomp 1,2,3,11 -fqmulprecomp 1,2,4,12 -fqmulprecomp 1,2,5,13 -fqmulprecomp 1,2,6,14 -fqmulprecomp 1,2,7,15 -fqmulprecomp 1,2,8,11 -fqmulprecomp 1,2,9,12 -fqmulprecomp 1,2,10,13 - -#store -vmovdqa %ymm3,(%rdi) -vmovdqa %ymm4,32(%rdi) -vmovdqa %ymm5,64(%rdi) -vmovdqa %ymm6,96(%rdi) -vmovdqa %ymm7,128(%rdi) -vmovdqa %ymm8,160(%rdi) -vmovdqa %ymm9,192(%rdi) -vmovdqa %ymm10,224(%rdi) - -ret - -.global cdecl(tomont_avx) -cdecl(tomont_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 -vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 -call tomont128_avx -add $256,%rdi -call tomont128_avx -ret diff --git a/code/jasmin/avx2v/fq.inc b/code/jasmin/avx2v/fq.inc deleted file mode 100644 index 4cb28a8e..00000000 --- a/code/jasmin/avx2v/fq.inc +++ /dev/null @@ -1,26 +0,0 @@ -.macro red16 r,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -vpsraw $10,%ymm\x,%ymm\x -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm diff --git a/code/jasmin/avx2v/gen_matrix.jazz b/code/jasmin/avx2v/gen_matrix.jazz deleted file mode 100644 index 3789bb29..00000000 --- a/code/jasmin/avx2v/gen_matrix.jazz +++ /dev/null @@ -1,59 +0,0 @@ -require "gen_matrix.jinc" -/* -require "gen_matrix_old.jinc" - -export fn gen_matrix_old_jazz(reg u64 ap, reg u64 seedp) -{ - stack u16[KYBER_K*KYBER_VECN] aa; - stack u8[KYBER_SYMBYTES] seed; - reg u8 c; - reg u16 t; - inline int i; - stack u64 sap; - - sap = ap; - - for i = 0 to KYBER_SYMBYTES - { - c = (u8)[seedp + i]; - seed[i] = c; - } - - aa = __gen_matrix_old(seed, 1); - - ap = sap; - - for i = 0 to KYBER_K*KYBER_VECN - { - t = aa[i]; - (u16)[ap + 2*i] = t; - } -} -*/ -export fn gen_matrix_jazz(reg u64 ap, reg u64 seedp) -{ - stack u16[KYBER_K*KYBER_VECN] aa; - stack u8[KYBER_SYMBYTES] seed; - reg u8 c; - reg u16 t; - inline int i; - stack u64 sap; - - sap = ap; - - for i = 0 to KYBER_SYMBYTES - { - c = (u8)[seedp + i]; - seed[i] = c; - } - - aa = __gen_matrix(seed, 1); - - ap = sap; - - for i = 0 to KYBER_K*KYBER_VECN - { - t = aa[i]; - (u16)[ap + 2*i] = t; - } -} diff --git a/code/jasmin/avx2v/gen_matrix.jinc b/code/jasmin/avx2v/gen_matrix.jinc deleted file mode 100644 index 6e07b4b7..00000000 --- a/code/jasmin/avx2v/gen_matrix.jinc +++ /dev/null @@ -1,137 +0,0 @@ -require "params.jinc" -require "shuffle.jinc" -require "fips202.jinc" -require "params.jinc" - -inline -fn __rej_uniform(stack u16[KYBER_N] rp, reg u64 offset, stack u8[SHAKE128_RATE] buf) -> reg u64, stack u16[KYBER_N] -{ - reg u16 val1 val2; - reg u16 t; - reg u64 pos ctr; - reg u64 cnd0 cnd1 exit; - - - ctr = offset; - pos = 0; - exit = 0; - - while(exit == 0) - { - val1 = (16u)buf[(int)pos]; - pos += 1; - t = (16u)buf[(int)pos]; - val2 = t; - val2 >>= 4; - t &= 0x0F; - t <<= 8; - val1 |= t; - pos += 1; - - t = (16u)buf[(int)pos]; - t <<= 4; - val2 |= t; - pos += 1; - - if(val1 < KYBER_Q) - { - rp[(int)ctr] = val1; - ctr += 1; - } - - if(val2 < KYBER_Q) - { - if(ctr < KYBER_N) - { - rp[(int)ctr] = val2; - ctr += 1; - } - } - - // Check if we should exit the loop - cnd0 = KYBER_N; - cnd0 -= ctr; - cnd0 -= 1; - cnd1 = SHAKE128_RATE; - cnd1 -= pos; - cnd1 -= 3; //TODO: (potentially) wasting 2 'good' bytes - exit = cnd0 | cnd1; - exit >>= 63; - } - - return ctr, rp; -} - -inline -fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, reg u64 transposed) -> stack u16[KYBER_K*KYBER_VECN] -{ - stack u8[34] extseed; - stack u8[SHAKE128_RATE] buf; - stack u64[25] state; - stack u16[KYBER_N] poly; - stack u16[KYBER_K*KYBER_VECN] r; - - reg u8 c; - reg u16 t; - reg u64 ctr k l; - stack u64 sctr; - stack u64 stransposed; - inline int j i; - - stransposed = transposed; - - for j = 0 to KYBER_SYMBYTES - { - c = seed[j]; - extseed[j] = c; - } - - for i=0 to KYBER_K - { - for j = 0 to KYBER_K - { - transposed = stransposed; - if(transposed == 0) - { - extseed[KYBER_SYMBYTES] = j; - extseed[KYBER_SYMBYTES+1] = i; - } - else - { - extseed[KYBER_SYMBYTES] = i; - extseed[KYBER_SYMBYTES+1] = j; - } - - state = _shake128_absorb34(state, extseed); - - ctr = 0; - while (ctr < KYBER_N) - { - sctr = ctr; - state, buf = _shake128_squeezeblock(state, buf); - ctr = sctr; - ctr, poly = __rej_uniform(poly, ctr, buf); - } - - k = 0; - l = i * KYBER_VECN + j * KYBER_N; - while (k < KYBER_N) - { - t = poly[(int) k]; - r[(int) l] = t; - k += 1; - l += 1; - } - } - } - - for i = 0 to KYBER_K - { - for j = 0 to KYBER_K - { - r[i*KYBER_VECN+j*KYBER_N:KYBER_N] = _nttunpack(r[i*KYBER_VECN+j*KYBER_N:KYBER_N]); - } - } - - return r; -} diff --git a/code/jasmin/avx2v/gen_matrix.jinc.try0 b/code/jasmin/avx2v/gen_matrix.jinc.try0 deleted file mode 100644 index 9c3f758d..00000000 --- a/code/jasmin/avx2v/gen_matrix.jinc.try0 +++ /dev/null @@ -1,940 +0,0 @@ -/** -benchmarks with this file - - our / supercop -key 83712/ 79134 skylake -enc 96680/ 74866 -dec 83562/ 65006 - -key 74472/ 71588 haswell -enc 89524/ 72472 -dec 77096/ 61512 - -key 93730/ 91723 alderlake -enc 109251/ 85250 -dec 97006/ 73901 - -**/ - -require "params.jinc" -require "consts.jinc" -require "shuffle.jinc" -require "fips202.jinc" -require "fips202_4x.jinc" - -param int GENMATRIX_NBLOCKS = ((12*KYBER_N/8*4096/KYBER_Q + SHAKE128_RATE)/SHAKE128_RATE); -param int REJ_UNIFORM_AVX_BUFLEN = GENMATRIX_NBLOCKS * SHAKE128_RATE; - -param int USE_AVX2_REJECTION = 0; -param int USE_SQUEEZE_N = 0; - -u8[2048] ru_idx = {-1, -1, -1, -1, -1, -1, -1, -1, - 0, -1, -1, -1, -1, -1, -1, -1, - 2, -1, -1, -1, -1, -1, -1, -1, - 0, 2, -1, -1, -1, -1, -1, -1, - 4, -1, -1, -1, -1, -1, -1, -1, - 0, 4, -1, -1, -1, -1, -1, -1, - 2, 4, -1, -1, -1, -1, -1, -1, - 0, 2, 4, -1, -1, -1, -1, -1, - 6, -1, -1, -1, -1, -1, -1, -1, - 0, 6, -1, -1, -1, -1, -1, -1, - 2, 6, -1, -1, -1, -1, -1, -1, - 0, 2, 6, -1, -1, -1, -1, -1, - 4, 6, -1, -1, -1, -1, -1, -1, - 0, 4, 6, -1, -1, -1, -1, -1, - 2, 4, 6, -1, -1, -1, -1, -1, - 0, 2, 4, 6, -1, -1, -1, -1, - 8, -1, -1, -1, -1, -1, -1, -1, - 0, 8, -1, -1, -1, -1, -1, -1, - 2, 8, -1, -1, -1, -1, -1, -1, - 0, 2, 8, -1, -1, -1, -1, -1, - 4, 8, -1, -1, -1, -1, -1, -1, - 0, 4, 8, -1, -1, -1, -1, -1, - 2, 4, 8, -1, -1, -1, -1, -1, - 0, 2, 4, 8, -1, -1, -1, -1, - 6, 8, -1, -1, -1, -1, -1, -1, - 0, 6, 8, -1, -1, -1, -1, -1, - 2, 6, 8, -1, -1, -1, -1, -1, - 0, 2, 6, 8, -1, -1, -1, -1, - 4, 6, 8, -1, -1, -1, -1, -1, - 0, 4, 6, 8, -1, -1, -1, -1, - 2, 4, 6, 8, -1, -1, -1, -1, - 0, 2, 4, 6, 8, -1, -1, -1, - 10, -1, -1, -1, -1, -1, -1, -1, - 0, 10, -1, -1, -1, -1, -1, -1, - 2, 10, -1, -1, -1, -1, -1, -1, - 0, 2, 10, -1, -1, -1, -1, -1, - 4, 10, -1, -1, -1, -1, -1, -1, - 0, 4, 10, -1, -1, -1, -1, -1, - 2, 4, 10, -1, -1, -1, -1, -1, - 0, 2, 4, 10, -1, -1, -1, -1, - 6, 10, -1, -1, -1, -1, -1, -1, - 0, 6, 10, -1, -1, -1, -1, -1, - 2, 6, 10, -1, -1, -1, -1, -1, - 0, 2, 6, 10, -1, -1, -1, -1, - 4, 6, 10, -1, -1, -1, -1, -1, - 0, 4, 6, 10, -1, -1, -1, -1, - 2, 4, 6, 10, -1, -1, -1, -1, - 0, 2, 4, 6, 10, -1, -1, -1, - 8, 10, -1, -1, -1, -1, -1, -1, - 0, 8, 10, -1, -1, -1, -1, -1, - 2, 8, 10, -1, -1, -1, -1, -1, - 0, 2, 8, 10, -1, -1, -1, -1, - 4, 8, 10, -1, -1, -1, -1, -1, - 0, 4, 8, 10, -1, -1, -1, -1, - 2, 4, 8, 10, -1, -1, -1, -1, - 0, 2, 4, 8, 10, -1, -1, -1, - 6, 8, 10, -1, -1, -1, -1, -1, - 0, 6, 8, 10, -1, -1, -1, -1, - 2, 6, 8, 10, -1, -1, -1, -1, - 0, 2, 6, 8, 10, -1, -1, -1, - 4, 6, 8, 10, -1, -1, -1, -1, - 0, 4, 6, 8, 10, -1, -1, -1, - 2, 4, 6, 8, 10, -1, -1, -1, - 0, 2, 4, 6, 8, 10, -1, -1, - 12, -1, -1, -1, -1, -1, -1, -1, - 0, 12, -1, -1, -1, -1, -1, -1, - 2, 12, -1, -1, -1, -1, -1, -1, - 0, 2, 12, -1, -1, -1, -1, -1, - 4, 12, -1, -1, -1, -1, -1, -1, - 0, 4, 12, -1, -1, -1, -1, -1, - 2, 4, 12, -1, -1, -1, -1, -1, - 0, 2, 4, 12, -1, -1, -1, -1, - 6, 12, -1, -1, -1, -1, -1, -1, - 0, 6, 12, -1, -1, -1, -1, -1, - 2, 6, 12, -1, -1, -1, -1, -1, - 0, 2, 6, 12, -1, -1, -1, -1, - 4, 6, 12, -1, -1, -1, -1, -1, - 0, 4, 6, 12, -1, -1, -1, -1, - 2, 4, 6, 12, -1, -1, -1, -1, - 0, 2, 4, 6, 12, -1, -1, -1, - 8, 12, -1, -1, -1, -1, -1, -1, - 0, 8, 12, -1, -1, -1, -1, -1, - 2, 8, 12, -1, -1, -1, -1, -1, - 0, 2, 8, 12, -1, -1, -1, -1, - 4, 8, 12, -1, -1, -1, -1, -1, - 0, 4, 8, 12, -1, -1, -1, -1, - 2, 4, 8, 12, -1, -1, -1, -1, - 0, 2, 4, 8, 12, -1, -1, -1, - 6, 8, 12, -1, -1, -1, -1, -1, - 0, 6, 8, 12, -1, -1, -1, -1, - 2, 6, 8, 12, -1, -1, -1, -1, - 0, 2, 6, 8, 12, -1, -1, -1, - 4, 6, 8, 12, -1, -1, -1, -1, - 0, 4, 6, 8, 12, -1, -1, -1, - 2, 4, 6, 8, 12, -1, -1, -1, - 0, 2, 4, 6, 8, 12, -1, -1, - 10, 12, -1, -1, -1, -1, -1, -1, - 0, 10, 12, -1, -1, -1, -1, -1, - 2, 10, 12, -1, -1, -1, -1, -1, - 0, 2, 10, 12, -1, -1, -1, -1, - 4, 10, 12, -1, -1, -1, -1, -1, - 0, 4, 10, 12, -1, -1, -1, -1, - 2, 4, 10, 12, -1, -1, -1, -1, - 0, 2, 4, 10, 12, -1, -1, -1, - 6, 10, 12, -1, -1, -1, -1, -1, - 0, 6, 10, 12, -1, -1, -1, -1, - 2, 6, 10, 12, -1, -1, -1, -1, - 0, 2, 6, 10, 12, -1, -1, -1, - 4, 6, 10, 12, -1, -1, -1, -1, - 0, 4, 6, 10, 12, -1, -1, -1, - 2, 4, 6, 10, 12, -1, -1, -1, - 0, 2, 4, 6, 10, 12, -1, -1, - 8, 10, 12, -1, -1, -1, -1, -1, - 0, 8, 10, 12, -1, -1, -1, -1, - 2, 8, 10, 12, -1, -1, -1, -1, - 0, 2, 8, 10, 12, -1, -1, -1, - 4, 8, 10, 12, -1, -1, -1, -1, - 0, 4, 8, 10, 12, -1, -1, -1, - 2, 4, 8, 10, 12, -1, -1, -1, - 0, 2, 4, 8, 10, 12, -1, -1, - 6, 8, 10, 12, -1, -1, -1, -1, - 0, 6, 8, 10, 12, -1, -1, -1, - 2, 6, 8, 10, 12, -1, -1, -1, - 0, 2, 6, 8, 10, 12, -1, -1, - 4, 6, 8, 10, 12, -1, -1, -1, - 0, 4, 6, 8, 10, 12, -1, -1, - 2, 4, 6, 8, 10, 12, -1, -1, - 0, 2, 4, 6, 8, 10, 12, -1, - 14, -1, -1, -1, -1, -1, -1, -1, - 0, 14, -1, -1, -1, -1, -1, -1, - 2, 14, -1, -1, -1, -1, -1, -1, - 0, 2, 14, -1, -1, -1, -1, -1, - 4, 14, -1, -1, -1, -1, -1, -1, - 0, 4, 14, -1, -1, -1, -1, -1, - 2, 4, 14, -1, -1, -1, -1, -1, - 0, 2, 4, 14, -1, -1, -1, -1, - 6, 14, -1, -1, -1, -1, -1, -1, - 0, 6, 14, -1, -1, -1, -1, -1, - 2, 6, 14, -1, -1, -1, -1, -1, - 0, 2, 6, 14, -1, -1, -1, -1, - 4, 6, 14, -1, -1, -1, -1, -1, - 0, 4, 6, 14, -1, -1, -1, -1, - 2, 4, 6, 14, -1, -1, -1, -1, - 0, 2, 4, 6, 14, -1, -1, -1, - 8, 14, -1, -1, -1, -1, -1, -1, - 0, 8, 14, -1, -1, -1, -1, -1, - 2, 8, 14, -1, -1, -1, -1, -1, - 0, 2, 8, 14, -1, -1, -1, -1, - 4, 8, 14, -1, -1, -1, -1, -1, - 0, 4, 8, 14, -1, -1, -1, -1, - 2, 4, 8, 14, -1, -1, -1, -1, - 0, 2, 4, 8, 14, -1, -1, -1, - 6, 8, 14, -1, -1, -1, -1, -1, - 0, 6, 8, 14, -1, -1, -1, -1, - 2, 6, 8, 14, -1, -1, -1, -1, - 0, 2, 6, 8, 14, -1, -1, -1, - 4, 6, 8, 14, -1, -1, -1, -1, - 0, 4, 6, 8, 14, -1, -1, -1, - 2, 4, 6, 8, 14, -1, -1, -1, - 0, 2, 4, 6, 8, 14, -1, -1, - 10, 14, -1, -1, -1, -1, -1, -1, - 0, 10, 14, -1, -1, -1, -1, -1, - 2, 10, 14, -1, -1, -1, -1, -1, - 0, 2, 10, 14, -1, -1, -1, -1, - 4, 10, 14, -1, -1, -1, -1, -1, - 0, 4, 10, 14, -1, -1, -1, -1, - 2, 4, 10, 14, -1, -1, -1, -1, - 0, 2, 4, 10, 14, -1, -1, -1, - 6, 10, 14, -1, -1, -1, -1, -1, - 0, 6, 10, 14, -1, -1, -1, -1, - 2, 6, 10, 14, -1, -1, -1, -1, - 0, 2, 6, 10, 14, -1, -1, -1, - 4, 6, 10, 14, -1, -1, -1, -1, - 0, 4, 6, 10, 14, -1, -1, -1, - 2, 4, 6, 10, 14, -1, -1, -1, - 0, 2, 4, 6, 10, 14, -1, -1, - 8, 10, 14, -1, -1, -1, -1, -1, - 0, 8, 10, 14, -1, -1, -1, -1, - 2, 8, 10, 14, -1, -1, -1, -1, - 0, 2, 8, 10, 14, -1, -1, -1, - 4, 8, 10, 14, -1, -1, -1, -1, - 0, 4, 8, 10, 14, -1, -1, -1, - 2, 4, 8, 10, 14, -1, -1, -1, - 0, 2, 4, 8, 10, 14, -1, -1, - 6, 8, 10, 14, -1, -1, -1, -1, - 0, 6, 8, 10, 14, -1, -1, -1, - 2, 6, 8, 10, 14, -1, -1, -1, - 0, 2, 6, 8, 10, 14, -1, -1, - 4, 6, 8, 10, 14, -1, -1, -1, - 0, 4, 6, 8, 10, 14, -1, -1, - 2, 4, 6, 8, 10, 14, -1, -1, - 0, 2, 4, 6, 8, 10, 14, -1, - 12, 14, -1, -1, -1, -1, -1, -1, - 0, 12, 14, -1, -1, -1, -1, -1, - 2, 12, 14, -1, -1, -1, -1, -1, - 0, 2, 12, 14, -1, -1, -1, -1, - 4, 12, 14, -1, -1, -1, -1, -1, - 0, 4, 12, 14, -1, -1, -1, -1, - 2, 4, 12, 14, -1, -1, -1, -1, - 0, 2, 4, 12, 14, -1, -1, -1, - 6, 12, 14, -1, -1, -1, -1, -1, - 0, 6, 12, 14, -1, -1, -1, -1, - 2, 6, 12, 14, -1, -1, -1, -1, - 0, 2, 6, 12, 14, -1, -1, -1, - 4, 6, 12, 14, -1, -1, -1, -1, - 0, 4, 6, 12, 14, -1, -1, -1, - 2, 4, 6, 12, 14, -1, -1, -1, - 0, 2, 4, 6, 12, 14, -1, -1, - 8, 12, 14, -1, -1, -1, -1, -1, - 0, 8, 12, 14, -1, -1, -1, -1, - 2, 8, 12, 14, -1, -1, -1, -1, - 0, 2, 8, 12, 14, -1, -1, -1, - 4, 8, 12, 14, -1, -1, -1, -1, - 0, 4, 8, 12, 14, -1, -1, -1, - 2, 4, 8, 12, 14, -1, -1, -1, - 0, 2, 4, 8, 12, 14, -1, -1, - 6, 8, 12, 14, -1, -1, -1, -1, - 0, 6, 8, 12, 14, -1, -1, -1, - 2, 6, 8, 12, 14, -1, -1, -1, - 0, 2, 6, 8, 12, 14, -1, -1, - 4, 6, 8, 12, 14, -1, -1, -1, - 0, 4, 6, 8, 12, 14, -1, -1, - 2, 4, 6, 8, 12, 14, -1, -1, - 0, 2, 4, 6, 8, 12, 14, -1, - 10, 12, 14, -1, -1, -1, -1, -1, - 0, 10, 12, 14, -1, -1, -1, -1, - 2, 10, 12, 14, -1, -1, -1, -1, - 0, 2, 10, 12, 14, -1, -1, -1, - 4, 10, 12, 14, -1, -1, -1, -1, - 0, 4, 10, 12, 14, -1, -1, -1, - 2, 4, 10, 12, 14, -1, -1, -1, - 0, 2, 4, 10, 12, 14, -1, -1, - 6, 10, 12, 14, -1, -1, -1, -1, - 0, 6, 10, 12, 14, -1, -1, -1, - 2, 6, 10, 12, 14, -1, -1, -1, - 0, 2, 6, 10, 12, 14, -1, -1, - 4, 6, 10, 12, 14, -1, -1, -1, - 0, 4, 6, 10, 12, 14, -1, -1, - 2, 4, 6, 10, 12, 14, -1, -1, - 0, 2, 4, 6, 10, 12, 14, -1, - 8, 10, 12, 14, -1, -1, -1, -1, - 0, 8, 10, 12, 14, -1, -1, -1, - 2, 8, 10, 12, 14, -1, -1, -1, - 0, 2, 8, 10, 12, 14, -1, -1, - 4, 8, 10, 12, 14, -1, -1, -1, - 0, 4, 8, 10, 12, 14, -1, -1, - 2, 4, 8, 10, 12, 14, -1, -1, - 0, 2, 4, 8, 10, 12, 14, -1, - 6, 8, 10, 12, 14, -1, -1, -1, - 0, 6, 8, 10, 12, 14, -1, -1, - 2, 6, 8, 10, 12, 14, -1, -1, - 0, 2, 6, 8, 10, 12, 14, -1, - 4, 6, 8, 10, 12, 14, -1, -1, - 0, 4, 6, 8, 10, 12, 14, -1, - 2, 4, 6, 8, 10, 12, 14, -1, - 0, 2, 4, 6, 8, 10, 12, 14}; - -inline -fn __shake128_squeezenblocks(stack u64[25] state, stack u8[REJ_UNIFORM_AVX_BUFLEN] out) - -> stack u64[25], stack u8[REJ_UNIFORM_AVX_BUFLEN] -{ - inline int i; - - for i = 0 to GENMATRIX_NBLOCKS - { - state, out[i*SHAKE128_RATE:SHAKE128_RATE] = _shake128_squeezeblock(state, out[i*SHAKE128_RATE:SHAKE128_RATE]); - } - return state, out; -} - -inline -fn __shake128_squeezenblocks4x(reg ptr u256[25] state, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] h0 h1 h2 h3) - -> reg ptr u256[25], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN], reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] -{ - inline int i; - - for i = 0 to GENMATRIX_NBLOCKS - { - state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE] = __shake128_squeezeblock4x(state, h0[i*SHAKE128_RATE:SHAKE128_RATE], h1[i*SHAKE128_RATE:SHAKE128_RATE], h2[i*SHAKE128_RATE:SHAKE128_RATE], h3[i*SHAKE128_RATE:SHAKE128_RATE]); - } - - return state, h0, h1, h2, h3; -} - -inline -fn __rej_uniform(reg ptr u16[KYBER_N] rp, reg u64 offset, reg ptr u8[SHAKE128_RATE] buf, inline int buflen) -> reg u64, stack u16[KYBER_N] -{ - reg u16 val0 val1; - reg u16 t; - reg u64 pos ctr; - reg u8 fl1 fl2; - reg bool cf zf b; - - ctr = offset; - pos = 0; - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, buflen - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - - while(!b) - { - val0 = (16u)buf[(int)pos]; - pos += 1; - - t = (16u)buf[(int)pos]; - val1 = t; - val1 >>= 4; - - t &= 0x0F; - t <<= 8; - val0 |= t; - pos += 1; - - t = (16u)buf[(int)pos]; - t <<= 4; - val1 |= t; - pos += 1; - - if(val0 < KYBER_Q) - { - rp[(int)ctr] = val0; - ctr += 1; - } - - if(ctr < KYBER_N) - { - if(val1 < KYBER_Q) - { - rp[(int)ctr] = val1; - ctr += 1; - } - } - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, buflen - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - return ctr, rp; -} - -fn _rej_uniformn(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] buf) -> reg u64, reg ptr u16[KYBER_N] -{ - reg u16 val0 val1; - reg u16 t; - reg u64 pos ctr; - reg u8 fl1 fl2; - reg bool b; - - ctr = 0; - pos = 0; - - ?{ "==" = b } = #CMP_64(pos, 1); - - while(!b) - { - val0 = (16u)buf[(int)pos]; - pos += 1; - - t = (16u)buf[(int)pos]; - val1 = t; - val1 >>= 4; - - t &= 0x0F; - t <<= 8; - val0 |= t; - pos += 1; - - t = (16u)buf[(int)pos]; - t <<= 4; - val1 |= t; - pos += 1; - - if(val0 < KYBER_Q) - { - rp[(int)ctr] = val0; - ctr += 1; - } - - if(ctr < KYBER_N) - { - if(val1 < KYBER_Q) - { - rp[(int)ctr] = val1; - ctr += 1; - } - } - - ?{ "<=u" = b } = #CMP_64(ctr, KYBER_N - 1); - fl1 = #SETcc(b); - - ?{ "<=u" = b } = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 3); - fl2 = #SETcc(b); - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - return ctr, rp; -} - -u8 ru_ones_s = 1; -u16 ru_mask_s = 0x0FFF; -u8[32] ru_idx8_s = {0, 1, 1, 2, 3, 4, 4, 5, - 6, 7, 7, 8, 9, 10, 10, 11, - 4, 5, 5, 6, 7, 8, 8, 9, - 10, 11, 11, 12, 13, 14, 14, 15}; - -fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] buf) -> reg u64, reg ptr u16[KYBER_N] -{ - reg u256 f0 f1 g0 g1 g2 g3; - reg u256 bound ones mask idx8; - reg u128 f t l h; - reg u64 pos ctr t64 t64_1 t64_2 t64_3; - reg u64 good; - reg u16 val0 val1 t16; - reg ptr u8[2048] idxp; - reg u8 fl1 fl2; - reg bool cf zf b; - - idxp = ru_idx; - - bound = jqx16[u256 0]; - ctr = 0; - pos = 0; - ones = #VPBROADCAST_32u8(ru_ones_s); - mask = #VPBROADCAST_16u16(ru_mask_s); - idx8 = ru_idx8_s[u256 0]; - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 32); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 48); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - - while(!b) - { - f0 = #VPERMQ(buf.[u256 (int)pos], 0x94); - f1 = #VPERMQ(buf.[u256 24 + (int)pos], 0x94); - f0 = #VPSHUFB_256(f0, idx8); - f1 = #VPSHUFB_256(f1, idx8); - g0 = #VPSRL_16u16(f0, 4); - g1 = #VPSRL_16u16(f1, 4); - f0 = #VPBLEND_16u16(f0, g0, 0xAA); - f1 = #VPBLEND_16u16(f1, g1, 0xAA); - f0 = #VPAND_256(f0, mask); - f1 = #VPAND_256(f1, mask); - - g0 = #VPCMPGT_16u16(bound, f0); - g1 = #VPCMPGT_16u16(bound, f1); - - g0 = #VPACKSS_16u16(g0, g1); - good = #VPMOVMSKB_u256u64(g0); - - t64 = good; - t64 &= 0xFF; - g0 = (256u) #VMOV(idxp[u64 (int)t64]); - - t64_1 = good; - t64_1 >>= 16; - t64_1 &= 0xFF; - l = #VMOV(idxp[u64 (int)t64_1]); - - t64_2 = good; - t64_2 >>= 8; - t64_2 &= 0xFF; - g1 = (256u) #VMOV(idxp[u64 (int)t64_2]); - - t64_3 = good; - t64_3 >>= 24; - t64_3 &= 0xFF; - h = #VMOV(idxp[u64 (int)t64_3]); - - g0 = #VINSERTI128(g0, l, 1); - - _, _, _, _, _, t64 = #POPCNT_64(t64); - _, _, _, _, _, t64_1 = #POPCNT_64(t64_1); - t64 += ctr; - - g1 = #VINSERTI128(g1, h, 1); - - t64_1 += t64; - _, _, _, _, _, t64_2 = #POPCNT_64(t64_2); - t64_2 += t64_1; - _, _, _, _, _, t64_3 = #POPCNT_64(t64_3); - t64_3 += t64_2; - - g2 = #VPADD_32u8(g0, ones); - g0 = #VPUNPCKL_32u8(g0, g2); - g3 = #VPADD_32u8(g1, ones); - g1 = #VPUNPCKL_32u8(g1, g3); - - f0 = #VPSHUFB_256(f0, g0); - f1 = #VPSHUFB_256(f1, g1); - - rp.[u128 2*(int)ctr] = (128u)f0; - rp.[u128 2*(int)t64] = #VEXTRACTI128(f0, 1); - rp.[u128 2*(int)t64_1] = (128u)f1; - rp.[u128 2*(int)t64_2] = #VEXTRACTI128(f1, 1); - - ctr = t64_3; - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 32); - fl1 = #SETcc(cf || zf); - - pos += 48; - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 48); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 8); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 12); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - - t64 = 0x5555; - while(!b) - { - f = buf.[u128 (int)pos]; - f = #VPSHUFB_128(f, idx8); - t = #VPSRL_8u16(f, 4); - f = #VPBLEND_8u16(f, t, 0xAA); - f = #VPAND_128(f, mask); - - t = #VPCMPGT_8u16(bound, f); - good = #VPMOVMSKB_u128u64(t); - - good = #PEXT_64(good, t64); - l = #VMOV(idxp[u64 (int)good]); - _, _, _, _, _, good = #POPCNT_64(good); - - h = #VPADD_16u8(l, ones); - l = #VPUNPCKL_16u8(l, h); - f = #VPSHUFB_128(f, l); - - rp.[u128 2*(int)ctr] = f; - ctr += good; - - pos += 12; - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 8); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 12); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 1); - fl1 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 3); - fl2 = #SETcc(cf || zf); - - _, _, _, _, b = #TEST_8(fl1, fl2); - - while(!b) - { - val0 = (16u)buf[(int)pos]; - pos += 1; - t16 = (16u)buf[(int)pos]; - pos += 1; - val1 = t16; - - t16 <<= 8; - val0 |= t16; - val0 &= 0xFFF; - - val1 >>= 4; - t16 = (16u)buf[(int)pos]; - pos += 1; - t16 <<= 4; - val1 |= t16; - - if(val0 < KYBER_Q) - { - rp[(int)ctr] = val0; - ctr += 1; - } - if(val1 < KYBER_Q) - { - if(ctr < KYBER_N) - { - rp[(int)ctr] = val1; - ctr += 1; - } - } - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, REJ_UNIFORM_AVX_BUFLEN - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - return ctr, rp; -} - - -inline fn __r2s(reg u256 f) -> stack u256 { - stack u256 fs; - fs = f; - return f; -} - - -inline fn __s2r(stack u256 fs) -> reg u256 { - reg u256 f; - f = fs; - return f; -} - - -inline -fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, inline int transposed) -> stack u16[KYBER_K*KYBER_VECN] -{ - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf0; - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf1; - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf2; - stack u8[REJ_UNIFORM_AVX_BUFLEN] buf3; - stack u256[25] state; - stack u16[KYBER_K*KYBER_VECN] rr; - stack u256 fs; - reg u256 f; - reg u64 ctr0 ctr1 ctr2 ctr3 tmp; - stack u64 ctr0_s; - reg u8 flg0 flg1 bflg; - reg bool cf zf; - - inline int i, j; - - f = seed[u256 0]; - buf0[u256 0] = f; - buf1[u256 0] = f; - buf2[u256 0] = f; - buf3[u256 0] = f; - fs = __r2s(f); - - if(transposed == 1) - { - buf0[KYBER_SYMBYTES] = 0; - buf0[KYBER_SYMBYTES+1] = 0; - buf1[KYBER_SYMBYTES] = 0; - buf1[KYBER_SYMBYTES+1] = 1; - buf2[KYBER_SYMBYTES] = 0; - buf2[KYBER_SYMBYTES+1] = 2; - buf3[KYBER_SYMBYTES] = 1; - buf3[KYBER_SYMBYTES+1] = 0; - } - else - { - buf0[KYBER_SYMBYTES] = 0; - buf0[KYBER_SYMBYTES+1] = 0; - buf1[KYBER_SYMBYTES] = 1; - buf1[KYBER_SYMBYTES+1] = 0; - buf2[KYBER_SYMBYTES] = 2; - buf2[KYBER_SYMBYTES+1] = 0; - buf3[KYBER_SYMBYTES] = 0; - buf3[KYBER_SYMBYTES+1] = 1; - } - - state = _shake128_absorb4x_34(state, buf0[0:34], buf1[0:34], buf2[0:34], buf3[0:34]); - - if ( USE_SQUEEZE_N == 1 ) { - - state, buf0, buf1, buf2, buf3 = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3); - - if (USE_AVX2_REJECTION == 1) { - tmp, rr[0*KYBER_VECN+0*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], buf0); - ctr0 = tmp; - tmp, rr[0*KYBER_VECN+1*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[0*KYBER_VECN+1*KYBER_N:KYBER_N], buf1); - ctr1 = tmp; - tmp, rr[0*KYBER_VECN+2*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[0*KYBER_VECN+2*KYBER_N:KYBER_N], buf2); - ctr2 = tmp; - ctr3, rr[1*KYBER_VECN+0*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[1*KYBER_VECN+0*KYBER_N:KYBER_N], buf3); - } else { - tmp, rr[0*KYBER_VECN+0*KYBER_N:KYBER_N] = _rej_uniformn(rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], buf0); - ctr0 = tmp; - tmp, rr[0*KYBER_VECN+1*KYBER_N:KYBER_N] = _rej_uniformn(rr[0*KYBER_VECN+1*KYBER_N:KYBER_N], buf1); - ctr1 = tmp; - tmp, rr[0*KYBER_VECN+2*KYBER_N:KYBER_N] = _rej_uniformn(rr[0*KYBER_VECN+2*KYBER_N:KYBER_N], buf2); - ctr2 = tmp; - ctr3, rr[1*KYBER_VECN+0*KYBER_N:KYBER_N] = _rej_uniformn(rr[1*KYBER_VECN+0*KYBER_N:KYBER_N], buf3); - } - - _, cf, _, _, zf = #CMP_64(ctr0, KYBER_N - 1); - flg0 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(ctr1, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, KYBER_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - - } - else - { - ctr0 = 0; - ctr1 = 0; - ctr2 = 0; - ctr3 = 0; - flg0 = 1; - bflg = 1; - } - - while(bflg != 0) { - state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE] = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE]); - - ctr0, rr[0*KYBER_VECN+0*KYBER_N:KYBER_N] = __rej_uniform(rr[0*KYBER_VECN+0*KYBER_N:KYBER_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE); - ctr1, rr[0*KYBER_VECN+1*KYBER_N:KYBER_N] = __rej_uniform(rr[0*KYBER_VECN+1*KYBER_N:KYBER_N], ctr1, buf1[0:SHAKE128_RATE], SHAKE128_RATE); - ctr2, rr[0*KYBER_VECN+2*KYBER_N:KYBER_N] = __rej_uniform(rr[0*KYBER_VECN+2*KYBER_N:KYBER_N], ctr2, buf2[0:SHAKE128_RATE], SHAKE128_RATE); - ctr3, rr[1*KYBER_VECN+0*KYBER_N:KYBER_N] = __rej_uniform(rr[1*KYBER_VECN+0*KYBER_N:KYBER_N], ctr3, buf3[0:SHAKE128_RATE], SHAKE128_RATE); - - _, cf, _, _, zf = #CMP_64(ctr0, KYBER_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr1, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, KYBER_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - } - - f = __s2r(fs); - buf0[u256 0] = f; - buf1[u256 0] = f; - buf2[u256 0] = f; - buf3[u256 0] = f; - fs = __r2s(f); - - if(transposed == 1) - { - buf0[KYBER_SYMBYTES] = 1; - buf0[KYBER_SYMBYTES+1] = 1; - buf1[KYBER_SYMBYTES] = 1; - buf1[KYBER_SYMBYTES+1] = 2; - buf2[KYBER_SYMBYTES] = 2; - buf2[KYBER_SYMBYTES+1] = 0; - buf3[KYBER_SYMBYTES] = 2; - buf3[KYBER_SYMBYTES+1] = 1; - } - else - { - buf0[KYBER_SYMBYTES] = 1; - buf0[KYBER_SYMBYTES+1] = 1; - buf1[KYBER_SYMBYTES] = 2; - buf1[KYBER_SYMBYTES+1] = 1; - buf2[KYBER_SYMBYTES] = 0; - buf2[KYBER_SYMBYTES+1] = 2; - buf3[KYBER_SYMBYTES] = 1; - buf3[KYBER_SYMBYTES+1] = 2; - } - - state = _shake128_absorb4x_34(state, buf0[0:34], buf1[0:34], buf2[0:34], buf3[0:34]); - - if ( USE_SQUEEZE_N == 1 ) { - - state, buf0, buf1, buf2, buf3 = __shake128_squeezenblocks4x(state, buf0, buf1, buf2, buf3); - - if (USE_AVX2_REJECTION == 1) { - tmp, rr[1*KYBER_VECN+1*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[1*KYBER_VECN+1*KYBER_N:KYBER_N], buf0); - ctr0 = tmp; - tmp, rr[1*KYBER_VECN+2*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[1*KYBER_VECN+2*KYBER_N:KYBER_N], buf1); - ctr1 = tmp; - tmp, rr[2*KYBER_VECN+0*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[2*KYBER_VECN+0*KYBER_N:KYBER_N], buf2); - ctr2 = tmp; - ctr3, rr[2*KYBER_VECN+1*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[2*KYBER_VECN+1*KYBER_N:KYBER_N], buf3); - } else { - tmp, rr[1*KYBER_VECN+1*KYBER_N:KYBER_N] = _rej_uniformn(rr[1*KYBER_VECN+1*KYBER_N:KYBER_N], buf0); - ctr0 = tmp; - tmp, rr[1*KYBER_VECN+2*KYBER_N:KYBER_N] = _rej_uniformn(rr[1*KYBER_VECN+2*KYBER_N:KYBER_N], buf1); - ctr1 = tmp; - tmp, rr[2*KYBER_VECN+0*KYBER_N:KYBER_N] = _rej_uniformn(rr[2*KYBER_VECN+0*KYBER_N:KYBER_N], buf2); - ctr2 = tmp; - ctr3, rr[2*KYBER_VECN+1*KYBER_N:KYBER_N] = _rej_uniformn(rr[2*KYBER_VECN+1*KYBER_N:KYBER_N], buf3); - } - - _, cf, _, _, zf = #CMP_64(ctr0, KYBER_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr1, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, KYBER_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - - } else { - ctr0 = 0; - ctr1 = 0; - ctr2 = 0; - ctr3 = 0; - flg0 = 1; - bflg = 1; - } - - - - while(bflg != 0) { - state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE] = __shake128_squeezeblock4x(state, buf0[0:SHAKE128_RATE], buf1[0:SHAKE128_RATE], buf2[0:SHAKE128_RATE], buf3[0:SHAKE128_RATE]); - - ctr0, rr[1*KYBER_VECN+1*KYBER_N:KYBER_N] = __rej_uniform(rr[1*KYBER_VECN+1*KYBER_N:KYBER_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE); - ctr1, rr[1*KYBER_VECN+2*KYBER_N:KYBER_N] = __rej_uniform(rr[1*KYBER_VECN+2*KYBER_N:KYBER_N], ctr1, buf1[0:SHAKE128_RATE], SHAKE128_RATE); - ctr2, rr[2*KYBER_VECN+0*KYBER_N:KYBER_N] = __rej_uniform(rr[2*KYBER_VECN+0*KYBER_N:KYBER_N], ctr2, buf2[0:SHAKE128_RATE], SHAKE128_RATE); - ctr3, rr[2*KYBER_VECN+1*KYBER_N:KYBER_N] = __rej_uniform(rr[2*KYBER_VECN+1*KYBER_N:KYBER_N], ctr3, buf3[0:SHAKE128_RATE], SHAKE128_RATE); - - _, cf, _, _, zf = #CMP_64(ctr0, KYBER_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr1, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, bflg = #OR_8(flg0, flg1); - - _, cf, _, _, zf = #CMP_64(ctr2, KYBER_N - 1); - flg0 = #SETcc(cf || zf); - - _, cf, _, _, zf = #CMP_64(ctr3, KYBER_N - 1); - flg1 = #SETcc(cf || zf); - - _, _, _, _, _, flg0 = #OR_8(flg0, flg1); - _, _, _, _, _, bflg = #OR_8(flg0, bflg); - } - - f = __s2r(fs); - buf0[u256 0] = f; - buf0[KYBER_SYMBYTES] = 2; - buf0[KYBER_SYMBYTES+1] = 2; - - state[u64 0:25] = _shake128_absorb34(state[u64 0:25], buf0[0:34]); - - if ( USE_SQUEEZE_N == 1 ) { - - state[u64 0:25], buf0 = __shake128_squeezenblocks(state[u64 0:25], buf0); - - if (USE_AVX2_REJECTION == 1) { - ctr0, rr[2*KYBER_VECN+2*KYBER_N:KYBER_N] = _rej_uniform_avx(rr[2*KYBER_VECN+2*KYBER_N:KYBER_N], buf0); - } else { - ctr0, rr[2*KYBER_VECN+2*KYBER_N:KYBER_N] = _rej_uniformn(rr[2*KYBER_VECN+2*KYBER_N:KYBER_N], buf0); - } - - _, cf, _, _, zf = #CMP_64(ctr0, KYBER_N - 1); - bflg = #SETcc(cf || zf); - - } else { - ctr0 = 0; - bflg = 1; - } - - while(bflg != 0) { - ctr0_s = ctr0; - state[u64 0:25], buf0[0:SHAKE128_RATE] = _shake128_squeezeblock(state[u64 0:25], buf0[0:SHAKE128_RATE]); - ctr0 = ctr0_s; - - ctr0, rr[2*KYBER_VECN+2*KYBER_N:KYBER_N] = __rej_uniform(rr[2*KYBER_VECN+2*KYBER_N:KYBER_N], ctr0, buf0[0:SHAKE128_RATE], SHAKE128_RATE); - - _, cf, _, _, zf = #CMP_64(ctr0, KYBER_N - 1); - bflg = #SETcc(cf || zf); - } - - for i = 0 to KYBER_K - { - for j = 0 to KYBER_K - { - rr[i*KYBER_VECN+j*KYBER_N:KYBER_N] = _nttunpack(rr[i*KYBER_VECN+j*KYBER_N:KYBER_N]); - } - } - - return rr; -} diff --git a/code/jasmin/avx2v/gen_matrix_old.jinc b/code/jasmin/avx2v/gen_matrix_old.jinc deleted file mode 100644 index 184ac209..00000000 --- a/code/jasmin/avx2v/gen_matrix_old.jinc +++ /dev/null @@ -1,129 +0,0 @@ -require "params.jinc" -require "consts.jinc" -require "shuffle.jinc" -require "fips202.jinc" - -param int GENMATRIX_NBLOCKS = 3; -param int REJ_UNIFORM_BUFLEN = GENMATRIX_NBLOCKS * SHAKE128_RATE; - -inline -fn __rej_uniform_old(stack u16[KYBER_N] rp, reg u64 offset, stack u8[REJ_UNIFORM_BUFLEN] buf, inline int buflen) -> reg u64, stack u16[KYBER_N] -{ - reg u16 val0 val1; - reg u16 t; - reg u64 pos ctr; - reg u8 fl1 fl2; - reg bool cf zf b; - - ctr = offset; - pos = 0; - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, buflen - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - - while(!b) - { - val0 = (16u)buf[(int)pos]; - pos += 1; - - t = (16u)buf[(int)pos]; - val1 = t; - val1 >>= 4; - - t &= 0x0F; - t <<= 8; - val0 |= t; - pos += 1; - - t = (16u)buf[(int)pos]; - t <<= 4; - val1 |= t; - pos += 1; - - if(val0 < KYBER_Q) - { - rp[(int)ctr] = val0; - ctr += 1; - } - - if(ctr < KYBER_N) - { - if(val1 < KYBER_Q) - { - rp[(int)ctr] = val1; - ctr += 1; - } - } - - _, cf, _, _, zf = #CMP_64(ctr, KYBER_N - 1); - fl1 = #SETcc(cf || zf); //SETBE - - _, cf, _, _, zf = #CMP_64(pos, buflen - 3); - fl2 = #SETcc(cf || zf); //SETBE - - _, _, _, _, b = #TEST_8(fl1, fl2); - } - - return ctr, rp; -} - -inline -fn __gen_matrix_old(stack u8[KYBER_SYMBYTES] seed, inline int transposed) -> stack u16[KYBER_K*KYBER_VECN] -{ - stack u8[34] extseed; - stack u8[REJ_UNIFORM_BUFLEN] buf; - stack u8[REJ_UNIFORM_BUFLEN] buf; - stack u8[REJ_UNIFORM_BUFLEN] buf; - stack u8[REJ_UNIFORM_BUFLEN] buf; - stack u64[25] state; - stack u16[KYBER_K*KYBER_VECN] rr; - - reg u64 t64; - stack u64 t64_s; - inline int i, j, k; - - for j = 0 to 4 - { - t64 = seed[u64 j]; - extseed[u64 j] = t64; - } - - for i = 0 to KYBER_K - { - for j = 0 to KYBER_K - { - if(transposed == 0) - { - extseed[KYBER_SYMBYTES] = j; - extseed[KYBER_SYMBYTES+1] = i; - } - else - { - extseed[KYBER_SYMBYTES] = i; - extseed[KYBER_SYMBYTES+1] = j; - } - - state = _shake128_absorb34(state, extseed); - - state, buf = __shake128_squeezenblocks(state, buf); - t64 = 0; - t64, rr[i*KYBER_VECN+j*KYBER_N:KYBER_N] = __rej_uniform_old(rr[i*KYBER_VECN+j*KYBER_N:KYBER_N], t64, buf, REJ_UNIFORM_BUFLEN); - - while (t64 < KYBER_N) - { - t64_s = t64; - state, buf[0:SHAKE128_RATE] = _shake128_squeezeblock(state, buf[0:SHAKE128_RATE]); - t64 = t64_s; - t64, rr[i*KYBER_VECN+j*KYBER_N:KYBER_N] = __rej_uniform_old(rr[i*KYBER_VECN+j*KYBER_N:KYBER_N], t64, buf, SHAKE128_RATE); - } - rr[i*KYBER_VECN+j*KYBER_N:KYBER_N] = _nttunpack(rr[i*KYBER_VECN+j*KYBER_N:KYBER_N]); - } - } - - return rr; -} diff --git a/code/jasmin/avx2v/indcpa.c b/code/jasmin/avx2v/indcpa.c deleted file mode 100644 index df90f7d8..00000000 --- a/code/jasmin/avx2v/indcpa.c +++ /dev/null @@ -1,320 +0,0 @@ -#include -#include "indcpa.h" -#include "poly.h" -#include "polyvec.h" -#include "ntt.h" -#include "symmetric.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: unsigned char *r: pointer to the output serialized public key -* const poly *pk: pointer to the input public-key polynomial -* const unsigned char *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(unsigned char *r, polyvec *pk, const unsigned char *seed) -{ - int i; - polyvec_tobytes(r, pk); - for(i=0;i> 4) | ((uint16_t)buf[pos+1] << 4)); - pos += 2; - - if(val1 < KYBER_Q) - { - r[ctr++] = (int16_t)val1; - } - - if(val2 < KYBER_Q && ctr < len) { - r[ctr++] = (int16_t)val2; - } - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const unsigned char *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -static void gen_matrix(polyvec *a, const unsigned char *seed, int transposed) // Not static for benchmarking -{ - unsigned int ctr, i, j; - const unsigned int maxnblocks=(530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES; /* 530 is expected number of required bytes */ - unsigned char buf[XOF_BLOCKBYTES*maxnblocks+1]; - xof_state state; - - for(i=0;i - -void indcpa_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - - - -void indcpa_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/avx2v/indcpa.jinc b/code/jasmin/avx2v/indcpa.jinc deleted file mode 100644 index d804e06a..00000000 --- a/code/jasmin/avx2v/indcpa.jinc +++ /dev/null @@ -1,245 +0,0 @@ -require "params.jinc" -require "poly.jinc" -require "polyvec.jinc" -require "gen_matrix.jinc" - -inline -fn __indcpa_keypair(reg u64 pkp, reg u64 skp, reg ptr u8[KYBER_SYMBYTES] randomnessp) -{ - stack u64 spkp sskp; - stack u16[KYBER_K*KYBER_VECN] aa; - stack u16[KYBER_VECN] e pkpv skpv; - stack u8[64] buf; - stack u8[KYBER_SYMBYTES] publicseed noiseseed; - stack u8[32] inbuf; - reg u64 t64; - reg u8 nonce; - inline int i; - - spkp = pkp; - sskp = skp; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = randomnessp[u64 i]; - inbuf[u64 i] = t64; - } - - buf = _sha3_512_32(buf, inbuf); - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = buf[u64 i]; - publicseed[u64 i] = t64; - t64 = buf[u64 i + KYBER_SYMBYTES/8]; - noiseseed[u64 i] = t64; - } - - aa = __gen_matrix(publicseed, 0); - - nonce = 0; - skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], skpv[2*KYBER_N:KYBER_N], e[0:KYBER_N] = _poly_getnoise_eta1_4x(skpv[0:KYBER_N], skpv[KYBER_N:KYBER_N], skpv[2*KYBER_N:KYBER_N], e[0:KYBER_N], noiseseed, nonce); - - nonce = 4; - e[KYBER_N:KYBER_N], e[2*KYBER_N:KYBER_N], pkpv[0:KYBER_N], pkpv[KYBER_N:KYBER_N] = _poly_getnoise_eta1_4x(e[KYBER_N:KYBER_N], e[2*KYBER_N:KYBER_N], pkpv[0:KYBER_N], pkpv[KYBER_N:KYBER_N], noiseseed, nonce); - - skpv = __polyvec_ntt(skpv); - e = __polyvec_ntt(e); - - - for i=0 to KYBER_K - { - pkpv[i*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(pkpv[i*KYBER_N:KYBER_N], aa[i*KYBER_VECN:KYBER_VECN], skpv); - pkpv[i*KYBER_N:KYBER_N] = _poly_frommont(pkpv[i*KYBER_N:KYBER_N]); - } - - pkpv = __polyvec_add2(pkpv, e); - pkpv = __polyvec_reduce(pkpv); - - pkp = spkp; - skp = sskp; - - __polyvec_tobytes(skp, skpv); - __polyvec_tobytes(pkp, pkpv); - - pkp += KYBER_POLYVECBYTES; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = publicseed[u64 i]; - (u64)[pkp] = t64; - pkp += 8; - } -} - -inline -fn __indcpa_enc_0(stack u64 sctp, reg ptr u8[KYBER_INDCPA_MSGBYTES] msgp, reg u64 pkp, reg ptr u8[KYBER_SYMBYTES] noiseseed) -{ - stack u16[KYBER_VECN] pkpv sp ep bp; - stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k epp v; - stack u8[KYBER_SYMBYTES] publicseed; - stack ptr u8[KYBER_SYMBYTES] s_noiseseed; - reg ptr u8[KYBER_SYMBYTES] lnoiseseed; - reg u64 i t64 ctp; - reg u8 nonce; - inline int w; - - pkpv = __polyvec_frombytes(pkp); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES/8) - { - t64 = (u64)[pkp]; - publicseed.[u64 8 * (int)i] = t64; - pkp += 8; - i += 1; - } - - k = _poly_frommsg_1(k, msgp); - - s_noiseseed = noiseseed; - aat = __gen_matrix(publicseed, 1); - lnoiseseed = s_noiseseed; - - nonce = 0; - sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N] = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], lnoiseseed, nonce); - - nonce = 4; - ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N] = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], lnoiseseed, nonce); - - sp = __polyvec_ntt(sp); - - for w=0 to KYBER_K - { - bp[w*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(bp[w*KYBER_N:KYBER_N], aat[w*KYBER_VECN:KYBER_VECN], sp); - } - - v = __polyvec_pointwise_acc(v, pkpv, sp); - - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); - - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); - - ctp = sctp; - __polyvec_compress(ctp, bp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_compress(ctp, v); -} - -inline -fn __indcpa_enc_1(reg ptr u8[KYBER_INDCPA_CIPHERTEXTBYTES] ctp, reg ptr u8[KYBER_INDCPA_MSGBYTES] msgp, reg u64 pkp, reg ptr u8[KYBER_SYMBYTES] noiseseed) -> reg ptr u8[KYBER_INDCPA_CIPHERTEXTBYTES] -{ - stack u16[KYBER_VECN] pkpv sp ep bp; - stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k epp v; - stack u8[KYBER_SYMBYTES] publicseed; - stack ptr u8[KYBER_SYMBYTES] s_noiseseed; - reg ptr u8[KYBER_SYMBYTES] lnoiseseed; - stack ptr u8[KYBER_INDCPA_CIPHERTEXTBYTES] sctp; - reg u64 i t64; - reg u8 nonce; - inline int w; - - sctp = ctp; - - pkpv = __polyvec_frombytes(pkp); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES/8) - { - t64 = (u64)[pkp]; - publicseed.[u64 8*(int)i] = t64; - pkp += 8; - i += 1; - } - - k = _poly_frommsg_1(k, msgp); - - s_noiseseed = noiseseed; - aat = __gen_matrix(publicseed, 1); - lnoiseseed = s_noiseseed; - - nonce = 0; - sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N] = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], lnoiseseed, nonce); - - nonce = 4; - ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N] = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], lnoiseseed, nonce); - - sp = __polyvec_ntt(sp); - - for w=0 to KYBER_K - { - bp[w*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(bp[w*KYBER_N:KYBER_N], aat[w*KYBER_VECN:KYBER_VECN], sp); - } - - v = __polyvec_pointwise_acc(v, pkpv, sp); - - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); - - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); - - ctp = sctp; - ctp[0:KYBER_POLYVECCOMPRESSEDBYTES] = __polyvec_compress_1(ctp[0:KYBER_POLYVECCOMPRESSEDBYTES], bp); - ctp[KYBER_POLYVECCOMPRESSEDBYTES:KYBER_POLYCOMPRESSEDBYTES], v = _poly_compress_1(ctp[KYBER_POLYVECCOMPRESSEDBYTES:KYBER_POLYCOMPRESSEDBYTES], v); - - return ctp; -} - -inline -fn __indcpa_dec_0(reg u64 msgp, reg u64 ctp, reg u64 skp) -{ - stack u16[KYBER_N] t v mp; - stack u16[KYBER_VECN] bp skpv; - - bp = __polyvec_decompress(ctp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_decompress(v, ctp); - - skpv = __polyvec_frombytes(skp); - - bp = __polyvec_ntt(bp); - t = __polyvec_pointwise_acc(t, skpv, bp); - t = _poly_invntt(t); - - mp = _poly_sub(mp, v, t); - mp = __poly_reduce(mp); - - mp = _poly_tomsg(msgp, mp); -} - -inline -fn __indcpa_dec_1(reg ptr u8[KYBER_INDCPA_MSGBYTES] msgp, reg u64 ctp, reg u64 skp) -> reg ptr u8[KYBER_INDCPA_MSGBYTES] -{ - stack u16[KYBER_N] t v mp; - stack u16[KYBER_VECN] bp skpv; - - bp = __polyvec_decompress(ctp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_decompress(v, ctp); - - skpv = __polyvec_frombytes(skp); - - bp = __polyvec_ntt(bp); - t = __polyvec_pointwise_acc(t, skpv, bp); - t = _poly_invntt(t); - - mp = _poly_sub(mp, v, t); - mp = __poly_reduce(mp); - - msgp, mp = _poly_tomsg_1(msgp, mp); - - return msgp; -} diff --git a/code/jasmin/avx2v/jbench.sh b/code/jasmin/avx2v/jbench.sh deleted file mode 100755 index c7b01104..00000000 --- a/code/jasmin/avx2v/jbench.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -#exec compile.bench - -ulimit -s 50000 - -rm compile.bench - -for arg in -until_typing -until_cstexp -until_inline -until_rmfunc -until_unroll -until_splitting -until_valloc -until_vallocd -until_vshare -until_vshared -until_arrexp -until_rmarrinit -until_rmglobals -until_arrexp -until_makeref -until_lowering -until_stkalloc -until_ralloc -until_rallocd -until_linear -until_asm; do - - echo "=====================================================" >> compile.bench - echo "===== Benchmark with flag $arg" >> compile.bench - echo "=====================================================" >> compile.bench - - make clean - export JADDFLAGS=$arg - #(time make jindcpa.s) 2>compile.bench - (time make jpolyvec.s 2>&1) 2>>compile.bench -done - - diff --git a/code/jasmin/avx2v/jfips202.jazz b/code/jasmin/avx2v/jfips202.jazz deleted file mode 100644 index 7735695c..00000000 --- a/code/jasmin/avx2v/jfips202.jazz +++ /dev/null @@ -1,102 +0,0 @@ -require "fips202.jinc" - -export fn shake256_128_33_jazz(reg u64 outp inp) -{ - stack u8[33] in; - stack u8[128] out; - stack u64 soutp; - reg u8 c; - inline int i; - - for i = 0 to 33 { - c = (u8)[inp + i]; - in[i] = c; - } - - soutp = outp; - out = _shake256_128_33(out, in); - outp = soutp; - - for i = 0 to 128 { - c = out[i]; - (u8)[outp + i] = c; - } -} - -export fn sha3_512_32_jazz(reg u64 outp inp) -{ - stack u8[32] in; - stack u8[64] out; - stack u64 soutp; - reg u8 c; - inline int i; - - for i = 0 to 32 { - c = (u8)[inp + i]; - in[i] = c; - } - - soutp = outp; - out = _sha3_512_32(out, in); - outp = soutp; - for i = 0 to 64 { - c = out[i]; - (u8)[outp + i] = c; - } -} - - -export fn shake128_absorb34_jazz(reg u64 statep, reg u64 inp) -{ - stack u64[25] state; - stack u8[34] in; - reg u8 c; - reg u64 t; - inline int i; - - for i = 0 to 34 { - c = (u8)[inp + i]; - in[i] = c; - } - - state = _shake128_absorb34(state, in); - - for i = 0 to 25 { - t = state[i]; - [statep + 8*i] = t; - } -} - -export fn shake128_squeezeblock_jazz(reg u64 outp, reg u64 statep) -{ - stack u64[25] state; - stack u8[SHAKE128_RATE] out; - reg u8 c; - reg u64 t; - inline int i; - stack u64 soutp; - stack u64 sstatep; - - for i = 0 to 25 { - t = [statep + 8*i]; - state[i] = t; - } - - soutp = outp; - sstatep = statep; - - state, out = _shake128_squeezeblock(state, out); - - outp = soutp; - statep = sstatep; - - for i = 0 to 25 { - t = state[i]; - [statep + 8*i] = t; - } - - for i = 0 to SHAKE128_RATE { - c = out[i]; - (u8)[outp + i] = c; - } -} diff --git a/code/jasmin/avx2v/jindcpa.jazz b/code/jasmin/avx2v/jindcpa.jazz deleted file mode 100644 index 7c6d20c5..00000000 --- a/code/jasmin/avx2v/jindcpa.jazz +++ /dev/null @@ -1,94 +0,0 @@ -require "indcpa.jinc" -require "params.jinc" -require "poly.jinc" -require "polyvec.jinc" -require "gen_matrix.jinc" - - -export fn indcpa_keypair_jazz(reg u64 pkp, reg u64 skp, reg u64 coins) -{ - stack u8[KYBER_SYMBYTES] randomness; - reg ptr u8[KYBER_SYMBYTES] randomnessp; - inline int i; - - randomnessp = randomness; - for i = 0 to KYBER_SYMBYTES { - randomnessp[i] = (u8)[coins + i]; - } - __indcpa_keypair(pkp, skp, randomnessp); -} - - -export fn indcpa_enc_jazz(reg u64 ctp, reg u64 msgp, reg u64 pkp, reg u64 coinsp) -{ - stack u16[KYBER_VECN] pkpv sp ep bp; - stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k epp v; - stack u8[KYBER_SYMBYTES] publicseed; - stack u8[KYBER_SYMBYTES] noiseseed; - reg u64 i; - reg u8 c nonce; - stack u64 sctp; - - sctp = ctp; - - i = 0; - while (i < KYBER_SYMBYTES) - { - c = (u8)[coinsp+i]; - noiseseed[(int)i] = c; - i += 1; - } - - pkpv = __polyvec_frombytes(pkp); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES) - { - c = (u8)[pkp]; - publicseed[(int)i] = c; - pkp += 1; - i += 1; - } - - k = _poly_frommsg(k, msgp); - - aat = __gen_matrix(publicseed, 1); - - nonce = 0; - sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N] = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], noiseseed, nonce); - - nonce = 4; - ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N] = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], noiseseed, nonce); - - - sp = __polyvec_ntt(sp); - - bp[0:KYBER_N] = __polyvec_pointwise_acc(bp[0:KYBER_N], aat[0:KYBER_VECN], sp); - bp[KYBER_N:KYBER_N]= __polyvec_pointwise_acc(bp[KYBER_N:KYBER_N], aat[KYBER_VECN:KYBER_VECN], sp); - bp[2*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(bp[2*KYBER_N:KYBER_N], aat[2*KYBER_VECN:KYBER_VECN], sp); - - v = __polyvec_pointwise_acc(v, pkpv, sp); - - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); - - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); - - ctp = sctp; - __polyvec_compress(ctp, bp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_compress(ctp, v); -} - - - -export fn indcpa_dec_jazz(reg u64 msgp, reg u64 ctp, reg u64 skp) -{ - __indcpa_dec_0(msgp, ctp, skp); -} diff --git a/code/jasmin/avx2v/jkem.jazz b/code/jasmin/avx2v/jkem.jazz deleted file mode 100644 index 7519e83c..00000000 --- a/code/jasmin/avx2v/jkem.jazz +++ /dev/null @@ -1,87 +0,0 @@ -require "kem.jinc" - -export fn jade_kem_kyber_kyber768_amd64_avx2v_keypair_derand(reg u64 public_key secret_key coins) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES*2] randomness; - reg ptr u8[KYBER_SYMBYTES*2] randomnessp; - inline int i; - - public_key = public_key; - secret_key = secret_key; - - for i = 0 to KYBER_SYMBYTES*2 - { - randomness[i] = (u8)[coins + i]; - } - - randomnessp = randomness; - - __crypto_kem_keypair_jazz(public_key, secret_key, randomnessp); - ?{}, r = #set0(); - return r; -} - -export fn jade_kem_kyber_kyber768_amd64_avx2v_enc_derand(reg u64 ciphertext shared_secret public_key coins) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES] randomness; - reg ptr u8[KYBER_SYMBYTES] randomnessp; - inline int i; - - ciphertext = ciphertext; - shared_secret = shared_secret; - public_key = public_key; - - for i = 0 to KYBER_SYMBYTES - { - randomness[i] = (u8)[coins + i]; - } - - randomnessp = randomness; - - __crypto_kem_enc_jazz(ciphertext, shared_secret, public_key, randomnessp); - ?{}, r = #set0(); - return r; -} - -export fn jade_kem_kyber_kyber768_amd64_avx2v_keypair(reg u64 public_key secret_key) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES*2] randomness; - reg ptr u8[KYBER_SYMBYTES*2] randomnessp; - - public_key = public_key; - secret_key = secret_key; - - randomnessp = randomness; - randomnessp = #randombytes(randomnessp); - __crypto_kem_keypair_jazz(public_key, secret_key, randomnessp); - ?{}, r = #set0(); - return r; -} - -export fn jade_kem_kyber_kyber768_amd64_avx2v_enc(reg u64 ciphertext shared_secret public_key) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES] randomness; - reg ptr u8[KYBER_SYMBYTES] randomnessp; - - ciphertext = ciphertext; - shared_secret = shared_secret; - public_key = public_key; - - randomnessp = randomness; - randomnessp = #randombytes(randomnessp); - __crypto_kem_enc_jazz(ciphertext, shared_secret, public_key, randomnessp); - ?{}, r = #set0(); - return r; -} - -export fn jade_kem_kyber_kyber768_amd64_avx2v_dec(reg u64 shared_secret ciphertext secret_key) -> reg u64 -{ - reg u64 r; - __crypto_kem_dec_jazz(shared_secret, ciphertext, secret_key); - ?{}, r = #set0(); - return r; -} diff --git a/code/jasmin/avx2v/jpoly.jazz b/code/jasmin/avx2v/jpoly.jazz deleted file mode 100644 index 4a352883..00000000 --- a/code/jasmin/avx2v/jpoly.jazz +++ /dev/null @@ -1,316 +0,0 @@ -require "params.jinc" -require "poly.jinc" - -/* These exported functions are just for unit testing */ - -export fn poly_compress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] a; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - a = _poly_compress(rp, a); -} - -export fn poly_decompress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - r = _poly_decompress(r, ap); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_tobytes_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] a; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - a = _nttunpack(a); - a = _poly_tobytes(rp, a); -} - -export fn poly_frombytes_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - r = _poly_frombytes(r, ap); - - r = _nttpack(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_tomsg_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] a; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - a = _poly_tomsg(rp, a); -} - -export fn poly_frommsg_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - r = _poly_frommsg(r, ap); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - - -export fn poly_add2_jazz(reg u64 rp, reg u64 bp) -{ - stack u16[KYBER_N] r; - stack u16[KYBER_N] b; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r = _poly_add2(r, b); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_sub_jazz(reg u64 rp, reg u64 ap, reg u64 bp) -{ - stack u16[KYBER_N] a; - stack u16[KYBER_N] b; - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r = _poly_sub(r, a, b); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_ntt_jazz(reg u64 rp) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _poly_ntt(r); - - r = _nttpack(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_invntt_jazz(reg u64 rp) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _nttunpack(r); - - r = _poly_invntt(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_basemul_jazz(reg u64 rp, reg u64 ap, reg u64 bp) -{ - stack u16[KYBER_N] a; - stack u16[KYBER_N] b; - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - t = (u16)[rp + 2*i]; - r[i] = t; - } - - a = _nttunpack(a); - b = _nttunpack(b); - r = _poly_basemul(r, a, b); - r = _nttpack(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_frommont_jazz(reg u64 rp) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _poly_frommont(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - - -export fn poly_getnoise_eta1_4x_jazz(reg u64 rp, reg u64 seedp, reg u8 nonce) -{ - stack u16[4 * KYBER_N] r; - stack u8[KYBER_SYMBYTES] seed; - stack u64 srp; - reg u16 t; - reg u8 d; - inline int i; - - srp = rp; - - for i = 0 to KYBER_SYMBYTES { - d = (u8)[seedp + i]; - seed[i] = d; - } - - r[0:KYBER_N], r[KYBER_N:KYBER_N], r[2*KYBER_N:KYBER_N], r[3*KYBER_N:KYBER_N] = _poly_getnoise_eta1_4x(r[0:KYBER_N], r[KYBER_N:KYBER_N], r[2*KYBER_N:KYBER_N], r[3*KYBER_N:KYBER_N], seed, nonce); - - rp = srp; - for i = 0 to 4*KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_getnoise_eta1122_4x_jazz(reg u64 rp, reg u64 seedp, reg u8 nonce) -{ - stack u16[4 * KYBER_N] r; - stack u8[KYBER_SYMBYTES] seed; - stack u64 srp; - reg u16 t; - reg u8 d; - inline int i; - - srp = rp; - - for i = 0 to KYBER_SYMBYTES { - d = (u8)[seedp + i]; - seed[i] = d; - } - - r[0:KYBER_N], r[KYBER_N:KYBER_N], r[2*KYBER_N:KYBER_N], r[3*KYBER_N:KYBER_N] = _poly_getnoise_eta1122_4x(r[0:KYBER_N], r[KYBER_N:KYBER_N], r[2*KYBER_N:KYBER_N], r[3*KYBER_N:KYBER_N], seed, nonce); - - rp = srp; - for i = 0 to 4*KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - - -export fn poly_reduce_jazz(reg u64 rp) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __poly_reduce(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_csubq_jazz(reg u64 rp) -{ - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _poly_csubq(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} diff --git a/code/jasmin/avx2v/jpolyvec.jazz b/code/jasmin/avx2v/jpolyvec.jazz deleted file mode 100644 index 4407b7e6..00000000 --- a/code/jasmin/avx2v/jpolyvec.jazz +++ /dev/null @@ -1,211 +0,0 @@ -require "params.jinc" -require "polyvec.jinc" - -/* These exported functions are just for unit testing */ - -export fn polyvec_tobytes_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_VECN] a; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - a[0:KYBER_N] = _nttunpack(a[0:KYBER_N]); - a[KYBER_N:KYBER_N] = _nttunpack(a[KYBER_N:KYBER_N]); - a[2*KYBER_N:KYBER_N] = _nttunpack(a[2*KYBER_N:KYBER_N]); - - __polyvec_tobytes(rp, a); -} - - -export fn polyvec_decompress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_VECN] r; - reg u16 t; - inline int i; - - r = __polyvec_decompress(ap); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - - -export fn polyvec_compress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_VECN] a; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - __polyvec_compress(rp, a); -} - - -export fn polyvec_frombytes_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_VECN] r; - reg u16 t; - inline int i; - - r = __polyvec_frombytes(ap); - - r[0:KYBER_N] = _nttpack(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _nttpack(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _nttpack(r[2*KYBER_N:KYBER_N]); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_add2_jazz(reg u64 rp, reg u64 bp) -{ - stack u16[KYBER_VECN] a; - stack u16[KYBER_VECN] b; - stack u16[KYBER_VECN] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r = __polyvec_add2(a, b); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_pointwise_acc_jazz(reg u64 rp, reg u64 ap, reg u64 bp) -{ - stack u16[KYBER_VECN] a; - stack u16[KYBER_VECN] b; - stack u16[KYBER_N] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[ap + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - a[0:KYBER_N] = _nttunpack(a[0:KYBER_N]); - a[KYBER_N:KYBER_N] = _nttunpack(a[KYBER_N:KYBER_N]); - a[2*KYBER_N:KYBER_N] = _nttunpack(a[2*KYBER_N:KYBER_N]); - - b[0:KYBER_N] = _nttunpack(b[0:KYBER_N]); - b[KYBER_N:KYBER_N] = _nttunpack(b[KYBER_N:KYBER_N]); - b[2*KYBER_N:KYBER_N] = _nttunpack(b[2*KYBER_N:KYBER_N]); - - r = __polyvec_pointwise_acc(r, a, b); - - r = _nttpack(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_ntt_jazz(reg u64 rp) -{ - stack u16[KYBER_VECN] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __polyvec_ntt(r); - - r[0:KYBER_N] = _nttpack(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _nttpack(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _nttpack(r[2*KYBER_N:KYBER_N]); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_invntt_jazz(reg u64 rp) -{ - stack u16[KYBER_VECN] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r[0:KYBER_N] = _nttunpack(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _nttunpack(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _nttunpack(r[2*KYBER_N:KYBER_N]); - - r = __polyvec_invntt(r); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_csubq_jazz(reg u64 rp) -{ - stack u16[KYBER_VECN] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __polyvec_csubq(r); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_reduce_jazz(reg u64 rp) -{ - stack u16[KYBER_VECN] r; - reg u16 t; - inline int i; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __polyvec_reduce(r); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} diff --git a/code/jasmin/avx2v/jspeed.jazz b/code/jasmin/avx2v/jspeed.jazz deleted file mode 100644 index 45ff728d..00000000 --- a/code/jasmin/avx2v/jspeed.jazz +++ /dev/null @@ -1,197 +0,0 @@ -require "poly.jinc" -require "polyvec.jinc" -require "gen_matrix.jinc" -require "indcpa.jinc" -require "kem.jinc" -require "verify.jinc" - -/* Exported functions only for benchmarking */ -export fn gen_matrix_jazz(reg u64 ap, reg u64 seedp) -{ - stack u16[KYBER_K*KYBER_VECN] aa; - stack u8[KYBER_SYMBYTES] seed; - - aa = __gen_matrix(seed, 1); -} - -export fn poly_compress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] a; - - a = _poly_compress(rp, a); -} - -export fn poly_decompress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] r; - - r = _poly_decompress(r, ap); -} - -export fn poly_tomsg_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] a; - - a = _poly_tomsg(rp, a); -} - -export fn poly_frommsg_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_N] r; - - r = _poly_frommsg(r, ap); -} - -export fn poly_ntt_jazz(reg u64 rp) -{ - stack u16[KYBER_N] r; - - r = _poly_ntt(r); -} - -export fn poly_invntt_jazz(reg u64 rp) -{ - stack u16[KYBER_N] r; - - r = _poly_invntt(r); -} - - -export fn poly_getnoise_jazz(reg u64 rp, reg u64 seedp, reg u8 nonce) -{ - stack u16[KYBER_N] r; - stack u8[KYBER_SYMBYTES] seed; - - //r = _poly_getnoise_eta1_4x(r, seed, nonce); -} - - -export fn poly_getnoise_4x_jazz(reg u64 r0 r1 r2 r3, reg u64 seedp, reg u8 nonce) -{ - stack u16[KYBER_N] r0 r1 r2 r3; - stack u8[KYBER_SYMBYTES] seed; - - r0, r1, r2, r3 = _poly_getnoise_eta1_4x(r0, r1, r2, r3, seed, nonce); -} - - - -export fn polyvec_decompress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_VECN] r; - - r = __polyvec_decompress(ap); -} - - -export fn polyvec_compress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[KYBER_VECN] a; - - __polyvec_compress(rp, a); -} - - -export fn polyvec_pointwise_acc_jazz(reg u64 rp, reg u64 ap, reg u64 bp) -{ - stack u16[KYBER_VECN] a; - stack u16[KYBER_VECN] b; - stack u16[KYBER_N] r; - - r = __polyvec_pointwise_acc(r, a, b); -} - - -export fn indcpa_keypair_jazz(reg u64 pkp, reg u64 skp, reg u64 randomnessp) -{ - //__indcpa_keypair(pkp, skp, randomnessp); -} - - -export fn indcpa_enc_jazz(reg u64 ctp, reg u64 msgp, reg u64 pkp, reg u64 coinsp) -{ - stack u16[KYBER_VECN] pkpv sp ep bp; - stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k epp v; - stack u8[KYBER_SYMBYTES] publicseed; - stack u8[KYBER_SYMBYTES] noiseseed; - reg u64 i; - reg u8 c nonce; - stack u64 sctp; - - sctp = ctp; - - i = 0; - while (i < KYBER_SYMBYTES) - { - c = (u8)[coinsp+i]; - noiseseed[(int)i] = c; - i += 1; - } - - pkpv = __polyvec_frombytes(pkp); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES) - { - c = (u8)[pkp]; - publicseed[(int)i] = c; - pkp += 1; - i += 1; - } - - k = _poly_frommsg(k, msgp); - - aat = __gen_matrix(publicseed, 1); - - nonce = 0; - sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N] = _poly_getnoise_eta1_4x(sp[0:KYBER_N], sp[KYBER_N:KYBER_N], sp[2*KYBER_N:KYBER_N], ep[0:KYBER_N], noiseseed, nonce); - - nonce = 4; - ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N] = _poly_getnoise_eta1_4x(ep[KYBER_N:KYBER_N], ep[2*KYBER_N:KYBER_N], epp, bp[0:KYBER_N], noiseseed, nonce); - - sp = __polyvec_ntt(sp); - - bp[0:KYBER_N] = __polyvec_pointwise_acc(bp[0:KYBER_N], aat[0:KYBER_VECN], sp); - bp[KYBER_N:KYBER_N]= __polyvec_pointwise_acc(bp[KYBER_N:KYBER_N], aat[KYBER_VECN:KYBER_VECN], sp); - bp[2*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(bp[2*KYBER_N:KYBER_N], aat[2*KYBER_VECN:KYBER_VECN], sp); - - v = __polyvec_pointwise_acc(v, pkpv, sp); - - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); - - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); - - ctp = sctp; - __polyvec_compress(ctp, bp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_compress(ctp, v); -} - - -export fn indcpa_dec_jazz(reg u64 msgp, reg u64 ctp, reg u64 skp) -{ - __indcpa_dec_0(msgp, ctp, skp); -} - -export fn crypto_kem_keypair_jazz(reg u64 pkp, reg u64 skp, reg u64 randomnessp) -{ - //__crypto_kem_keypair_jazz(pkp, skp, randomnessp); -} - - -export fn crypto_kem_enc_jazz(reg u64 ctp, reg u64 shkp, reg u64 pkp, reg u64 randomnessp) -{ - //__crypto_kem_enc_jazz(ctp, shkp, pkp, randomnessp); -} - -export fn crypto_kem_dec_jazz(reg u64 shkp, reg u64 ctp, reg u64 skp) -{ - __crypto_kem_dec_jazz(shkp, ctp, skp); -} diff --git a/code/jasmin/avx2v/keccakf1600.jinc b/code/jasmin/avx2v/keccakf1600.jinc deleted file mode 100644 index 02996b6a..00000000 --- a/code/jasmin/avx2v/keccakf1600.jinc +++ /dev/null @@ -1,194 +0,0 @@ -u64[24] KECCAK_RC = -{ 0x0000000000000001 - ,0x0000000000008082 - ,0x800000000000808a - ,0x8000000080008000 - ,0x000000000000808b - ,0x0000000080000001 - ,0x8000000080008081 - ,0x8000000000008009 - ,0x000000000000008a - ,0x0000000000000088 - ,0x0000000080008009 - ,0x000000008000000a - ,0x000000008000808b - ,0x800000000000008b - ,0x8000000000008089 - ,0x8000000000008003 - ,0x8000000000008002 - ,0x8000000000000080 - ,0x000000000000800a - ,0x800000008000000a - ,0x8000000080008081 - ,0x8000000000008080 - ,0x0000000080000001 - ,0x8000000080008008 -}; - -inline fn __index(inline int x y) -> inline int -{ - inline int r; - r = (x % 5) + 5 * (y % 5); - return r; -} - -inline fn __keccak_rho_offsets(inline int i) -> inline int -{ - inline int r x y z t; - - r = 0; - x = 1; - y = 0; - - for t = 0 to 24 { - if (i == x + 5 * y) { - r = ((t + 1) * (t + 2) / 2) % 64; - } - z = (2 * x + 3 * y) % 5; - x = y; - y = z; - } - - return r; -} - -inline fn __rhotates(inline int x y) -> inline int -{ - inline int i r; - i = __index(x, y); - r = __keccak_rho_offsets(i); - return r; -} - -inline fn __theta_sum_scalar(reg ptr u64[25] a) -> reg u64[5] -{ - inline int i j ti; - reg u64[5] c; - - for i=0 to 5 - { - ti = __index(i, 0); - c[i] = a[ti]; - } - - for j=1 to 5 - { for i=0 to 5 - { - ti = __index(i, j); - c[i] ^= a[ti]; - } - } - - return c; -} - -inline fn __theta_rol_scalar(reg u64[5] c) -> reg u64[5] -{ - inline int i; - reg u64[5] d; - - for i = 0 to 5 - { d[i] = c[(i+1)%5]; - _, _, d[i] = #ROL_64(d[i], 1); - d[i] ^= c[(i+4)%5]; - } - - return d; -} - -inline fn __rol_sum_scalar( - reg u64[5] d, - reg ptr u64[25] a, - inline int offset -) -> reg u64[5] -{ - inline int j j1 k ti; - reg u64[5] c; - - for j = 0 to 5 - { - j1 = (j+offset) % 5; - k = __rhotates(j1, j); - ti = __index(j1, j); - c[j] = a[ti]; - c[j] ^= d[j1]; - _, _, c[j] = #ROL_64(c[j], k); - } - - return c; -} - -inline fn __set_row_scalar( - reg ptr u64[25] r, - inline int row, - reg u64[5] c, - reg u64 iota -) -> reg ptr u64[25] -{ - inline int j j1 j2 ti; - reg u64 t; - - for j= 0 to 5 - { - j1 = (j+1) % 5; - j2 = (j+2) % 5; - t = !c[j1] & c[j2]; - if row==0 && j==0 { t ^= iota; } - t ^= c[j]; - ti = __index(j, row); - r[ti] = t; - } - - return r; -} - -inline fn __round2x_scalar(reg ptr u64[25] a r, reg u64 iota) -> reg ptr u64[25], reg ptr u64[25] -{ - reg u64[5] c d; - - c = __theta_sum_scalar(a); - d = __theta_rol_scalar(c); - c = __rol_sum_scalar(d, a, 0); - r = __set_row_scalar(r, 0, c, iota); - c = __rol_sum_scalar(d, a, 3); - r = __set_row_scalar(r, 1, c, iota); - c = __rol_sum_scalar(d, a, 1); - r = __set_row_scalar(r, 2, c, iota); - c = __rol_sum_scalar(d, a, 4); - r = __set_row_scalar(r, 3, c, iota); - c = __rol_sum_scalar(d, a, 2); - r = __set_row_scalar(r, 4, c, iota); - - return a, r; -} - -#[returnaddress="stack"] -fn _keccakf1600_scalar(reg ptr u64[25] a) -> reg ptr u64[25] -{ - stack u64[25] r; - reg ptr u64[24] iotas_p; - reg u64 iota; - reg u64 round; - stack u64 round_s; - - iotas_p = KECCAK_RC; - - round = 0; - - while(round < 24) - { - iota = iotas_p[(int) round]; - round_s = round; - a, r = __round2x_scalar(a, r, iota); - round = round_s; - round += 1; - - iota = iotas_p[(int) round]; - round_s = round; - r, a = __round2x_scalar(r, a, iotas_p[(int) round]); - round = round_s; - round += 1; - } - - return a; -} diff --git a/code/jasmin/avx2v/kem.c b/code/jasmin/avx2v/kem.c deleted file mode 100644 index bc3471bc..00000000 --- a/code/jasmin/avx2v/kem.c +++ /dev/null @@ -1,141 +0,0 @@ -#include -#include -#include -#include "kem.h" -#include "indcpa.h" -#include "symmetric.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -uint64_t verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint8_t r; - - r = 0; - for(i=0; i < len; i ++) - r |= a[i] ^ b[i]; - - return (-(uint64_t)r) >> 63; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - - b = -b; - for(i=0; i < len; i++) - r[i] ^= b & (r[i] ^ x[i]); -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key for the CCA-secure -* Kyber key encapsulation mechanism -* -* Arguments: - unsigned char *pk: pointer to output public key -* - unsigned char *sk: pointer to output private key -**************************************************/ -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness) -{ - indcpa_keypair(pk, sk, randomness); - - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_INDCPA_PUBLICKEYBYTES); - - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, randomness + KYBER_SYMBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - unsigned char *c: pointer to output ciphertext (of length KYBER_INDCPA_BYTES bytes) -* - const unsigned char *m: pointer to input message (of length KYBER_INDCPA_MSGBYTES bytes) -* - const unsigned char *pk: pointer to input public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - const unsigned char *coin: pointer to input random coins used as seed (of length KYBER_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void crypto_kem_enc(unsigned char *ct, - unsigned char *ss, - const unsigned char *pk, - const unsigned char *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - uint8_t kr[2*KYBER_SYMBYTES]; - - hash_h(buf, coins, KYBER_SYMBYTES); - - hash_h(buf + KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); - - kdf(ss, kr, 2*KYBER_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - unsigned char *m: pointer to output decrypted message (of length KYBER_INDCPA_MSGBYTES) -* - const unsigned char *c: pointer to input ciphertext (of length KYBER_INDCPA_BYTES) -* - const unsigned char *sk: pointer to input secret key (of length KYBER_INDCPA_SECRETKEYBYTES) -**************************************************/ -void crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES]; - uint64_t cnd; - const uint8_t *pk = sk + KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - cnd = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); - - cmov(kr, sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, KYBER_SYMBYTES, cnd); - - kdf(ss, kr, 2*KYBER_SYMBYTES); -} diff --git a/code/jasmin/avx2v/kem.h b/code/jasmin/avx2v/kem.h deleted file mode 100644 index 7021f920..00000000 --- a/code/jasmin/avx2v/kem.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef CRYPTO_KEM_H -#define CRYPTO_KEM_H - -#include - -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void crypto_kem_enc(unsigned char *c, - unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void crypto_kem_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -void jade_kem_kyber_kyber768_amd64_avx2v_keypair_derand(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void jade_kem_kyber_kyber768_amd64_avx2v_enc_derand(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void jade_kem_kyber_kyber768_amd64_avx2v_keypair(unsigned char *pk, - unsigned char *sk); - -void jade_kem_kyber_kyber768_amd64_avx2v_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk); - -void jade_kem_kyber_kyber768_amd64_avx2v_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/avx2v/kem.jinc b/code/jasmin/avx2v/kem.jinc deleted file mode 100644 index 8a95347d..00000000 --- a/code/jasmin/avx2v/kem.jinc +++ /dev/null @@ -1,144 +0,0 @@ -require "indcpa.jinc" -require "verify.jinc" - -inline -fn __crypto_kem_keypair_jazz(reg u64 pkp, reg u64 skp, reg ptr u8[KYBER_SYMBYTES*2] randomnessp) -{ - stack ptr u8[KYBER_SYMBYTES*2] s_randomnessp; - reg ptr u8[KYBER_SYMBYTES] randomnessp1 randomnessp2; - - stack u8[32] h_pk; - stack u64 s_skp s_pkp; - reg u64 t64; - inline int i; - - s_randomnessp = randomnessp; - s_pkp = pkp; - s_skp = skp; - - randomnessp1 = randomnessp[0:KYBER_SYMBYTES]; - __indcpa_keypair(pkp, skp, randomnessp1); - - skp = s_skp; - skp += KYBER_POLYVECBYTES; - pkp = s_pkp; - - for i=0 to KYBER_INDCPA_PUBLICKEYBYTES/8 - { - t64 = (u64)[pkp + 8*i]; - (u64)[skp] = t64; - skp += 8; - } - - s_skp = skp; - pkp = s_pkp; - t64 = KYBER_PUBLICKEYBYTES; - h_pk = _isha3_256(h_pk, pkp, t64); - skp = s_skp; - - for i=0 to 4 - { - t64 = h_pk[u64 i]; - (u64)[skp] = t64; - skp += 8; - } - - randomnessp = s_randomnessp; - randomnessp2 = randomnessp[KYBER_SYMBYTES:KYBER_SYMBYTES]; - for i=0 to KYBER_SYMBYTES/8 - { - t64 = randomnessp2[u64 i]; - (u64)[skp] = t64; - skp += 8; - } -} - -inline -fn __crypto_kem_enc_jazz(reg u64 ctp, reg u64 shkp, reg u64 pkp, reg ptr u8[KYBER_SYMBYTES] randomnessp) -{ - inline int i; - - stack u8[KYBER_SYMBYTES * 2] buf kr; - stack u64 s_pkp s_ctp s_shkp; - reg u64 t64; - - s_pkp = pkp; - s_ctp = ctp; - s_shkp = shkp; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = randomnessp[u64 i]; - kr[u64 i] = t64; - } - - t64 = KYBER_SYMBYTES; - buf[0:KYBER_SYMBYTES] = _isha3_256_32(buf[0:KYBER_SYMBYTES], kr[0:KYBER_SYMBYTES]); - - pkp = s_pkp; - - t64 = KYBER_PUBLICKEYBYTES; - buf[KYBER_SYMBYTES:KYBER_SYMBYTES] = _isha3_256(buf[KYBER_SYMBYTES:KYBER_SYMBYTES], pkp, t64); - - kr = _sha3_512_64(kr, buf); - - pkp = s_pkp; - - __indcpa_enc_0(s_ctp, buf[0:KYBER_INDCPA_MSGBYTES], pkp, kr[KYBER_SYMBYTES:KYBER_SYMBYTES]); - - ctp = s_ctp; - t64 = KYBER_INDCPA_CIPHERTEXTBYTES; - kr[KYBER_SYMBYTES:KYBER_SYMBYTES] = _isha3_256(kr[KYBER_SYMBYTES:KYBER_SYMBYTES], ctp, t64); - - shkp = s_shkp; - t64 = KYBER_SSBYTES; - _shake256_64(shkp, t64, kr); -} - -inline -fn __crypto_kem_dec_jazz(reg u64 shkp, reg u64 ctp, reg u64 skp) -{ - stack u8[KYBER_INDCPA_CIPHERTEXTBYTES] ctpc; - stack u8[2*KYBER_SYMBYTES] kr buf; - stack u64 s_skp s_ctp s_shkp; - reg u64 pkp hp zp t64 cnd; - inline int i; - - s_shkp = shkp; - s_ctp = ctp; - - buf[0:KYBER_INDCPA_MSGBYTES] = __indcpa_dec_1(buf[0:KYBER_INDCPA_MSGBYTES], ctp, skp); - - hp = skp + 32; - hp += 24 * KYBER_K * KYBER_N>>3; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = (u64)[hp + 8*i]; - buf.[u64 KYBER_SYMBYTES + 8*i] = t64; - } - - s_skp = skp; - - kr = _sha3_512_64(kr, buf); - - pkp = s_skp; - pkp += 12 * KYBER_K * KYBER_N>>3; - - ctpc = __indcpa_enc_1(ctpc, buf[0:KYBER_INDCPA_MSGBYTES], pkp, kr[KYBER_SYMBYTES:KYBER_SYMBYTES]); - - ctp = s_ctp; - cnd = __verify(ctp, ctpc); - - zp = s_skp; - zp += 64; - zp += 24 * KYBER_K * KYBER_N>>3; - kr[0:KYBER_SYMBYTES] = __cmov(kr[0:KYBER_SYMBYTES], zp, cnd); - - t64 = KYBER_INDCPA_CIPHERTEXTBYTES; - kr[KYBER_SYMBYTES:KYBER_SYMBYTES] = _isha3_256(kr[KYBER_SYMBYTES:KYBER_SYMBYTES], ctp, t64); - - shkp = s_shkp; - t64 = KYBER_SSBYTES; - _shake256_64(shkp, t64, kr); -} diff --git a/code/jasmin/avx2v/ntt.S b/code/jasmin/avx2v/ntt.S deleted file mode 100644 index 30f7c210..00000000 --- a/code/jasmin/avx2v/ntt.S +++ /dev/null @@ -1,198 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" -.include "fq.inc" - -# We break the dependency chains with the cost of slightly more additions. -# But they can be run in parallel to the multiplications on execution port 5 -# (multiplications only go to ports 0 and 1) -.macro butterfly2 rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,x=3,y=2,zl0=15,zl1=15,zh0=1,zh1=1 -#mul -vpmullw %ymm\zl0,%ymm\rh0,%ymm12 -vpmulhw %ymm\zh0,%ymm\rh0,%ymm\x -vpmullw %ymm\zl0,%ymm\rh1,%ymm13 -vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh0 -vpmullw %ymm\zl1,%ymm\rh2,%ymm14 -vpmulhw %ymm\zh1,%ymm\rh2,%ymm\y -vpmullw %ymm\zl1,%ymm\rh3,%ymm15 -vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh2 - -#reduce -vpmulhw %ymm0,%ymm12,%ymm12 -vpmulhw %ymm0,%ymm13,%ymm13 -vpmulhw %ymm0,%ymm14,%ymm14 -vpmulhw %ymm0,%ymm15,%ymm15 - -vpsubw %ymm\rh0,%ymm\rl1,%ymm\rh1 -vpaddw %ymm\rh0,%ymm\rl1,%ymm\rl1 -vpsubw %ymm\x,%ymm\rl0,%ymm\rh0 -vpaddw %ymm\x,%ymm\rl0,%ymm\rl0 -vpsubw %ymm\rh2,%ymm\rl3,%ymm\rh3 -vpaddw %ymm\rh2,%ymm\rl3,%ymm\rl3 -vpsubw %ymm\y,%ymm\rl2,%ymm\rh2 -vpaddw %ymm\y,%ymm\rl2,%ymm\rl2 - -#update -vpaddw %ymm12,%ymm\rh0,%ymm\rh0 -vpsubw %ymm12,%ymm\rl0,%ymm\rl0 -vpaddw %ymm13,%ymm\rh1,%ymm\rh1 -vpsubw %ymm13,%ymm\rl1,%ymm\rl1 -vpaddw %ymm14,%ymm\rh2,%ymm\rh2 -vpsubw %ymm14,%ymm\rl2,%ymm\rl2 -vpaddw %ymm15,%ymm\rh3,%ymm\rh3 -vpsubw %ymm15,%ymm\rl3,%ymm\rl3 -.endm - -.text -ntt_level0_avx: -level0: -#zetas -vpbroadcastd (%rsi),%ymm15 -vpbroadcastd 4(%rsi),%ymm1 - -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 256(%rdi),%ymm8 -vmovdqa 288(%rdi),%ymm9 -vmovdqa 320(%rdi),%ymm10 -vmovdqa 352(%rdi),%ymm11 - -butterfly2 4,5,6,7,8,9,10,11 - -#store -vmovdqa %ymm4,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm6,64(%rdi) -vmovdqa %ymm7,96(%rdi) -vmovdqa %ymm8,256(%rdi) -vmovdqa %ymm9,288(%rdi) -vmovdqa %ymm10,320(%rdi) -vmovdqa %ymm11,352(%rdi) - -ret - -ntt_levels1t6_avx: -level1: -#zetas -vpbroadcastd (%rsi),%ymm15 -vpbroadcastd 4(%rsi),%ymm1 - -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -butterfly2 4,5,6,7,8,9,10,11,3 - -level2: -#zetas -vmovdqu 8(%rsi),%ymm15 -vmovdqu 40(%rsi),%ymm1 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -butterfly2 3,8,4,9,5,10,6,11,7 - -level3: -#zetas -vmovdqu 72(%rsi),%ymm15 -vmovdqu 104(%rsi),%ymm1 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -butterfly2 7,5,3,10,8,6,4,11,9 - -level4: -#zetas -vmovdqu 136(%rsi),%ymm15 -vmovdqu 168(%rsi),%ymm1 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -butterfly2 9,8,7,6,5,4,3,11,10 - -level5: -#zetas -vmovdqu 200(%rsi),%ymm15 -vmovdqu 232(%rsi),%ymm1 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -butterfly2 10,5,9,4,8,3,7,11,6 - -level6: -#zetas -vmovdqu 264(%rsi),%ymm14 -vmovdqu 328(%rsi),%ymm15 -vmovdqu 296(%rsi),%ymm1 -vmovdqu 360(%rsi),%ymm2 - -butterfly2 10,5,8,3,9,4,7,11,6,1,14,15,1,2 - -vmovdqa _16XV*2(%rdx),%ymm1 -red16 10,12 -red16 5,13 -red16 9,14 -red16 4,15 -red16 8,2 -red16 3,6 -red16 7,12 -red16 11,13 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttl0_avx) -cdecl(nttl0_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -mov %rsi,%rdx -add $_ZETAS_EXP*2,%rsi -call ntt_level0_avx -add $128,%rdi -call ntt_level0_avx -ret - - -.global cdecl(nttl1t6_avx) -cdecl(nttl1t6_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -mov %rsi,%rdx -add $_ZETAS_EXP*2,%rsi - -add $8,%rsi -call ntt_levels1t6_avx -add $256,%rdi -add $392,%rsi -call ntt_levels1t6_avx - -ret diff --git a/code/jasmin/avx2v/ntt.c b/code/jasmin/avx2v/ntt.c deleted file mode 100644 index b706bbcc..00000000 --- a/code/jasmin/avx2v/ntt.c +++ /dev/null @@ -1,152 +0,0 @@ -#include -#include "params.h" -#include "ntt.h" -#include "reduce.h" - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define KYBER_ROOT_OF_UNITY 17 - -static const uint16_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; - - -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -void init_ntt() { - unsigned int i, j, k; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i = 1; i < 128; ++i) - tmp[i] = fqmul(tmp[i-1], KYBER_ROOT_OF_UNITY*MONT % KYBER_Q); - - for(i = 0; i < 128; ++i) - zetas[i] = tmp[tree[i]]; - - k = 0; - for(i = 64; i >= 1; i >>= 1) - for(j = i; j < 2*i; ++j) - zetas_inv[k++] = -tmp[128 - tree[j]]; - - zetas_inv[127] = MONT * (MONT * (KYBER_Q - 1) * ((KYBER_Q - 1)/128) % KYBER_Q) % KYBER_Q; -} - -*/ -int16_t zetas[128] = { - 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628}; - -int16_t zetas_inv[128] = { - 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441}; - - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for(len = 128; len >= 2; len >>= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for(j = start; j < start + len; ++j) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt -* -* Description: Inplace inverse number-theoretic transform in Rq -* input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - - k = 0; - for(len = 2; len <= 128; len <<= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas_inv[k++]; - for(j = start; j < start + len; ++j) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = t - r[j + len]; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for(j = 0; j < 256; ++j) - r[j] = fqmul(r[j], zetas_inv[127]); -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/code/jasmin/avx2v/ntt.h b/code/jasmin/avx2v/ntt.h deleted file mode 100644 index 9d621d62..00000000 --- a/code/jasmin/avx2v/ntt.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include -#include "params.h" -#include "consts.h" - -extern int16_t zetas[128]; -extern int16_t zetas_inv[128]; - -void invntt(int16_t *poly); -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#define ntt_avx KYBER_NAMESPACE(ntt_avx) -//void ntt_avx(int16_t *r, const uint16_t *qdata); -//void nttl0_avx(int16_t *r, const uint16_t *qdata); -//void nttl1t6_avx(int16_t *r, const uint16_t *qdata); -#define invntt_avx KYBER_NAMESPACE(invntt_avx) -//void invntt_avx(int16_t *r, const uint16_t *qdata); - -#define nttpack_avx KYBER_NAMESPACE(nttpack_avx) -void nttpack_avx(int16_t *r, const uint16_t *qdata); -#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) -void nttunpack_avx(int16_t *r, const uint16_t *qdata); - -#define basemul_avx KYBER_NAMESPACE(basemul_avx) -void basemul_avx(int16_t *r, - const int16_t *a, - const int16_t *b, - const uint16_t *qdata); -#define basemul_acc_avx KYBER_NAMESPACE(basemul_acc_avx) -void basemul_acc_avx(int16_t *r, - const int16_t *a, - const int16_t *b, - const uint16_t *qdata); - -#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) -void ntttobytes_avx(uint8_t *r, const int16_t *a, const uint16_t *qdata); -#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) -void nttfrombytes_avx(int16_t *r, const uint8_t *a, const uint16_t *qdata); - - -void ntt(int16_t *poly); - -#endif diff --git a/code/jasmin/avx2v/params.h b/code/jasmin/avx2v/params.h deleted file mode 100644 index 631fc411..00000000 --- a/code/jasmin/avx2v/params.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - -#define KYBER_NAMESPACE(s) s - -/* Don't change parameters below this line */ - -#define KYBER_N 256 -#define KYBER_Q 3329 - - - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - - -#if KYBER_K == 2 -#define KYBER_ETA1 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_ETA2 2 - -#define KYBER_INDCPA_MSGBYTES KYBER_SYMBYTES -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ -#define KYBER_CIPHERTEXTBYTES KYBER_INDCPA_BYTES -#define KYBER_SSBYTES 32 - -#endif diff --git a/code/jasmin/avx2v/params.jinc b/code/jasmin/avx2v/params.jinc deleted file mode 100644 index caf2ec9f..00000000 --- a/code/jasmin/avx2v/params.jinc +++ /dev/null @@ -1,26 +0,0 @@ -param int KYBER_K = 3; - -param int KYBER_Q = 3329; -param int KYBER_N = 256; -param int KYBER_VECN = KYBER_K * KYBER_N; - -param int KYBER_SYMBYTES = 32; -param int KYBER_SSBYTES = 32; - -param int KYBER_ETA1 = 2; -param int KYBER_ETA2 = 2; - -param int KYBER_POLYBYTES = 384; -param int KYBER_POLYVECBYTES = (KYBER_K * KYBER_POLYBYTES); - -param int KYBER_POLYCOMPRESSEDBYTES = 128; -param int KYBER_POLYVECCOMPRESSEDBYTES = (KYBER_K * 320); - -param int KYBER_INDCPA_MSGBYTES = KYBER_SYMBYTES; -param int KYBER_INDCPA_PUBLICKEYBYTES = KYBER_POLYVECBYTES + KYBER_SYMBYTES; -param int KYBER_INDCPA_SECRETKEYBYTES = KYBER_POLYVECBYTES; -param int KYBER_INDCPA_CIPHERTEXTBYTES = KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES; - -param int KYBER_PUBLICKEYBYTES = KYBER_INDCPA_PUBLICKEYBYTES; -param int KYBER_SECRETKEYBYTES = KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES; -param int KYBER_CIPHERTEXTBYTES = KYBER_INDCPA_CIPHERTEXTBYTES; diff --git a/code/jasmin/avx2v/poly.c b/code/jasmin/avx2v/poly.c deleted file mode 100644 index 19c64f60..00000000 --- a/code/jasmin/avx2v/poly.c +++ /dev/null @@ -1,378 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYCOMPRESSEDBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(unsigned char *r, poly *a) -{ - uint8_t t[8]; - int i,j,k=0; - - poly_csubq(a); - -#if (KYBER_POLYCOMPRESSEDBYTES == 96) - for(i=0;icoeffs[i+j] << 3) + KYBER_Q/2) / KYBER_Q) & 7; - - r[k] = t[0] | (t[1] << 3) | (t[2] << 6); - r[k+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); - r[k+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); - k += 3; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 128) - for(i=0;icoeffs[i+j] << 4) + KYBER_Q/2) / KYBER_Q) & 15; - - r[k] = t[0] | (t[1] << 4); - r[k+1] = t[2] | (t[3] << 4); - r[k+2] = t[4] | (t[5] << 4); - r[k+3] = t[6] | (t[7] << 4); - k += 4; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - for(i=0;icoeffs[i+j] << 5) + KYBER_Q/2) / KYBER_Q) & 31; - - r[k] = t[0] | (t[1] << 5); - r[k+1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); - r[k+2] = (t[3] >> 1) | (t[4] << 4); - r[k+3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); - r[k+4] = (t[6] >> 2) | (t[7] << 3); - k += 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of length KYBER_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const unsigned char *a) -{ - int i; -#if (KYBER_POLYCOMPRESSEDBYTES == 96) - for(i=0;icoeffs[i+0] = (((a[0] & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+1] = ((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+2] = ((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3; - r->coeffs[i+3] = ((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+4] = ((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+5] = ((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3; - r->coeffs[i+6] = ((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+7] = ((((a[2] >> 5)) * KYBER_Q) + 4) >> 3; - a += 3; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 128) - for(i=0;icoeffs[i+0] = (((a[0] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+1] = (((a[0] >> 4) * KYBER_Q) + 8) >> 4; - r->coeffs[i+2] = (((a[1] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+3] = (((a[1] >> 4) * KYBER_Q) + 8) >> 4; - r->coeffs[i+4] = (((a[2] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+5] = (((a[2] >> 4) * KYBER_Q) + 8) >> 4; - r->coeffs[i+6] = (((a[3] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+7] = (((a[3] >> 4) * KYBER_Q) + 8) >> 4; - a += 4; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - for(i=0;icoeffs[i+0] = (((a[0] & 31) * KYBER_Q) + 16) >> 5; - r->coeffs[i+1] = ((((a[0] >> 5) | ((a[1] & 3) << 3)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+2] = ((((a[1] >> 2) & 31) * KYBER_Q) + 16) >> 5; - r->coeffs[i+3] = ((((a[1] >> 7) | ((a[2] & 15) << 1)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+4] = ((((a[2] >> 4) | ((a[3] & 1) << 4)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+5] = ((((a[3] >> 1) & 31) * KYBER_Q) + 16) >> 5; - r->coeffs[i+6] = ((((a[3] >> 6) | ((a[4] & 7) << 2)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+7] = (((a[4] >> 3) * KYBER_Q) + 16) >> 5; - a += 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(unsigned char *r, poly *a) -{ - int i; - uint16_t t0, t1; - - poly_csubq(a); - - for(i=0;icoeffs[2*i]; - t1 = a->coeffs[2*i+1]; - r[3*i] = t0 & 0xff; - r[3*i+1] = (t0 >> 8) | ((t1 & 0xf) << 4); - r[3*i+2] = t1 >> 4; - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const unsigned char *a) -{ - int i; - - for(i=0;icoeffs[2*i] = a[3*i] | ((uint16_t)a[3*i+1] & 0x0f) << 8; - r->coeffs[2*i+1] = a[3*i+1] >> 4 | ((uint16_t)a[3*i+2] & 0xff) << 4; - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *seed: pointer to input seed (pointing to array of length KYBER_SYMBYTES bytes) -* - unsigned char nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const unsigned char *seed, unsigned char nonce) -{ - uint8_t buf[KYBER_ETA1*KYBER_N/4]; - - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta1(r, buf); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[KYBER_ETA2*KYBER_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta2(r, buf); -} - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt(poly *r) -{ - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - - for(i = 0; i < KYBER_N/4; ++i) { - basemul(r->coeffs + 4*i, a->coeffs + 4*i, b->coeffs + 4*i, zetas[64 + i]); - basemul(r->coeffs + 4*i + 2, a->coeffs + 4*i + 2, b->coeffs + 4*i + 2, -zetas[64 + i]); - } -} - -/************************************************* -* Name: poly_frommont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from Montgomery domain to normal domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_frommont(poly *r) -{ - int i; - const int16_t f = (1ULL << 32) % KYBER_Q; - - for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - int i; - - for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); -} - -/************************************************* -* Name: poly_csubq -* -* Description: Applies conditional subtraction of q to each coefficient of a polynomial -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_csubq(poly *r) -{ - int i; - - for(i=0;icoeffs[i] = csubq(r->coeffs[i]); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const unsigned char msg[KYBER_SYMBYTES]) -{ - int i,j; - uint16_t mask; - - for(i=0;i> j)&1); - r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - unsigned char *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(unsigned char msg[KYBER_SYMBYTES], poly *a) -{ - uint16_t t; - int i,j; - - poly_csubq(a); - - for(i=0;icoeffs[8*i+j] << 1) + KYBER_Q/2) / KYBER_Q) & 1; - msg[i] |= t << j; - } - } -} diff --git a/code/jasmin/avx2v/poly.h b/code/jasmin/avx2v/poly.h deleted file mode 100644 index d9c2f73d..00000000 --- a/code/jasmin/avx2v/poly.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct{ - int16_t __attribute__((aligned(32))) coeffs[KYBER_N]; -} poly; - -void poly_compress(unsigned char *r, poly *a); -void poly_decompress(poly *r, const unsigned char *a); - -void poly_tobytes(unsigned char *r, poly *a); -void poly_frombytes(poly *r, const unsigned char *a); - -void poly_frommsg(poly *r, const unsigned char msg[KYBER_SYMBYTES]); -void poly_tomsg(unsigned char msg[KYBER_SYMBYTES], poly *r); - -void poly_getnoise_eta1(poly *r,const unsigned char *seed, unsigned char nonce); -void poly_getnoise_eta2(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt(poly *r); -void poly_invntt(poly *r); -void poly_basemul(poly *r, const poly *a, const poly *b); -void poly_frommont(poly *r); - -void poly_reduce(poly *r); -void poly_csubq(poly *r); - -void poly_add(poly *r, const poly *a, const poly *b); -void poly_sub(poly *r, const poly *a, const poly *b); - - - - - - -void poly_compress_jazz(unsigned char *r, poly *a); -void poly_decompress_jazz(poly *r, const unsigned char *a); - -void poly_tobytes_jazz(unsigned char *r, poly *a); -void poly_frombytes_jazz(poly *r, const unsigned char *a); - -void poly_frommsg_jazz(poly *r, const unsigned char msg[KYBER_SYMBYTES]); -void poly_tomsg_jazz(unsigned char msg[KYBER_SYMBYTES], poly *r); - - -void poly_getnoise_eta1_4x_jazz(poly *r,const unsigned char *seed, unsigned char nonce); -void poly_getnoise_eta1122_4x_jazz(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt_jazz(poly *r); -void poly_invntt_jazz(poly *r); -void poly_basemul_jazz(poly *r, const poly *a, const poly *b); -void poly_frommont_jazz(poly *r); - -void poly_reduce_jazz(poly *r); -void poly_csubq_jazz(poly *r); - -void poly_add2_jazz(poly *r, const poly *b); -void poly_sub_jazz(poly *r, const poly *a, const poly *b); - - -#endif diff --git a/code/jasmin/avx2v/poly.jinc b/code/jasmin/avx2v/poly.jinc deleted file mode 100644 index 0947512c..00000000 --- a/code/jasmin/avx2v/poly.jinc +++ /dev/null @@ -1,1411 +0,0 @@ -require "params.jinc" -require "shuffle.jinc" -require "consts.jinc" -require "reduce.jinc" -require "fips202.jinc" -require "fips202_4x.jinc" - -fn _poly_add2(reg ptr u16[KYBER_N] rp bp) -> stack u16[KYBER_N] -{ - inline int i; - reg u256 a; - reg u256 b; - reg u256 r; - - for i = 0 to 16 { - a = rp.[u256 32*i]; - b = bp.[u256 32*i]; - r = #VPADD_16u16(a, b); - rp.[u256 32*i] = r; - } - - return rp; -} - -fn _poly_csubq(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u256 r qx16; - inline int i; - - qx16 = jqx16[u256 0]; - - for i=0 to 16 { - r = rp.[u256 32*i]; - r = __csubq(r, qx16); - rp.[u256 32*i] = r; - } - - return rp; -} - -inline -fn __w256_interleave_u16(reg u256 al ah) -> reg u256, reg u256 { - reg u256 a0 a1; - - a0 = #VPUNPCKL_16u16(al, ah); - a1 = #VPUNPCKH_16u16(al, ah); - - return a0, a1; -} - -inline -fn __w256_deinterleave_u16(reg u256 _zero a0 a1) -> reg u256, reg u256 { - reg u256 al ah; - - al = #VPBLEND_16u16(a0,_zero,0xAA); - ah = #VPBLEND_16u16(a1,_zero,0xAA); - al = #VPACKUS_8u32(al, ah); - a0 = #VPSRL_8u32(a0,16); - a1 = #VPSRL_8u32(a1,16); - ah = #VPACKUS_8u32(a0, a1); - - return al, ah; -} - -inline -fn __mont_red(reg u256 lo hi qx16 qinvx16) -> reg u256 { - reg u256 m; - - m = #VPMULL_16u16(lo, qinvx16); - m = #VPMULH_16u16(m, qx16); - lo = #VPSUB_16u16(hi, m); - - return lo; -} - -inline -fn __wmul_16u16(reg u256 x y) -> reg u256, reg u256 { - reg u256 xyL xyH xy0 xy1; - xyL = #VPMULL_16u16(x, y); - xyH = #VPMULH_16u16(x, y); - xy0, xy1 = __w256_interleave_u16(xyL, xyH); - - return xy0, xy1; -} - -inline -fn __schoolbook16x(reg u256 are aim bre bim zeta zetaqinv qx16 qinvx16, inline int sign) -> reg u256, reg u256 -{ reg u256 zaim ac0 ac1 zbd0 zbd1 ad0 ad1 bc0 bc1 x0 x1 y0 y1 _zero; - - zaim = __fqmulprecomp16x(aim, zetaqinv, zeta, qx16); - - ac0, ac1 = __wmul_16u16(are, bre); - ad0, ad1 = __wmul_16u16(are, bim); - bc0, bc1 = __wmul_16u16(aim, bre); - zbd0, zbd1 = __wmul_16u16(zaim, bim); - - if (sign == 0) { - x0 = #VPADD_8u32(ac0, zbd0); - x1 = #VPADD_8u32(ac1, zbd1); - } else { - x0 = #VPSUB_8u32(ac0, zbd0); - x1 = #VPSUB_8u32(ac1, zbd1); - } - y0 = #VPADD_8u32(bc0, ad0); - y1 = #VPADD_8u32(bc1, ad1); - - _zero = #set0_256(); - x0, x1 = __w256_deinterleave_u16(_zero, x0, x1); - y0, y1 = __w256_deinterleave_u16(_zero, y0, y1); - x0 = __mont_red(x0, x1, qx16, qinvx16); - y0 = __mont_red(y0, y1, qx16, qinvx16); - return x0, y0; -} - -fn _poly_basemul(reg ptr u16[KYBER_N] rp ap bp) -> reg ptr u16[KYBER_N] -{ - reg u256 zeta zetaqinv qx16 qinvx16 are aim bre bim; - - qx16 = jqx16.[u256 0]; - qinvx16 = jqinvx16.[u256 0]; - - zetaqinv = jzetas_exp.[u256 272]; - zeta = jzetas_exp.[u256 304]; - - are = ap.[u256 32*0]; - aim = ap.[u256 32*1]; - bre = bp.[u256 32*0]; - bim = bp.[u256 32*1]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 0); - rp.[u256 32*0] = are; - rp.[u256 32*1] = aim; - - are = ap.[u256 32*2]; - aim = ap.[u256 32*3]; - bre = bp.[u256 32*2]; - bim = bp.[u256 32*3]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 1); - rp.[u256 32*2] = are; - rp.[u256 32*3] = aim; - - zetaqinv = jzetas_exp.[u256 336]; - zeta = jzetas_exp.[u256 368]; - - are = ap.[u256 32*4]; - aim = ap.[u256 32*5]; - bre = bp.[u256 32*4]; - bim = bp.[u256 32*5]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 0); - rp.[u256 32*4] = are; - rp.[u256 32*5] = aim; - - are = ap.[u256 32*6]; - aim = ap.[u256 32*7]; - bre = bp.[u256 32*6]; - bim = bp.[u256 32*7]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 1); - rp.[u256 32*6] = are; - rp.[u256 32*7] = aim; - - zetaqinv = jzetas_exp.[u256 664]; - zeta = jzetas_exp.[u256 696]; - - are = ap.[u256 32*8]; - aim = ap.[u256 32*9]; - bre = bp.[u256 32*8]; - bim = bp.[u256 32*9]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 0); - rp.[u256 32*8] = are; - rp.[u256 32*9] = aim; - - are = ap.[u256 32*10]; - aim = ap.[u256 32*11]; - bre = bp.[u256 32*10]; - bim = bp.[u256 32*11]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 1); - rp.[u256 32*10] = are; - rp.[u256 32*11] = aim; - - zetaqinv = jzetas_exp.[u256 728]; - zeta = jzetas_exp.[u256 760]; - - are = ap.[u256 32*12]; - aim = ap.[u256 32*13]; - bre = bp.[u256 32*12]; - bim = bp.[u256 32*13]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 0); - rp.[u256 32*12] = are; - rp.[u256 32*13] = aim; - - are = ap.[u256 32*14]; - aim = ap.[u256 32*15]; - bre = bp.[u256 32*14]; - bim = bp.[u256 32*15]; - are, aim = __schoolbook16x(are, aim, bre, bim, zeta, zetaqinv, qx16, qinvx16, 1); - rp.[u256 32*14] = are; - rp.[u256 32*15] = aim; - - return rp; -} - -u16 pc_shift1_s = 0x200; -u16 pc_mask_s = 0x0F; -u16 pc_shift2_s = 0x1001; -u32[8] pc_permidx_s = {0,4,1,5,2,6,3,7}; - -fn _poly_compress(reg u64 rp, reg ptr u16[KYBER_N] a) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 f0 f1 f2 f3 v shift1 mask shift2 permidx; - reg ptr u16[16] x16p; - - a = _poly_csubq(a); - - x16p = jvx16; - v = x16p[u256 0]; - shift1 = #VPBROADCAST_16u16(pc_shift1_s); - mask = #VPBROADCAST_16u16(pc_mask_s); - shift2 = #VPBROADCAST_16u16(pc_shift2_s); - permidx = pc_permidx_s[u256 0]; - - for i=0 to KYBER_N/64 - { - f0 = a[u256 4*i]; - f1 = a[u256 4*i + 1]; - f2 = a[u256 4*i + 2]; - f3 = a[u256 4*i + 3]; - f0 = #VPMULH_16u16(f0, v); - f1 = #VPMULH_16u16(f1, v); - f2 = #VPMULH_16u16(f2, v); - f3 = #VPMULH_16u16(f3, v); - f0 = #VPMULHRS_16u16(f0, shift1); - f1 = #VPMULHRS_16u16(f1, shift1); - f2 = #VPMULHRS_16u16(f2, shift1); - f3 = #VPMULHRS_16u16(f3, shift1); - f0 = #VPAND_256(f0, mask); - f1 = #VPAND_256(f1, mask); - f2 = #VPAND_256(f2, mask); - f3 = #VPAND_256(f3, mask); - f0 = #VPACKUS_16u16(f0, f1); - f2 = #VPACKUS_16u16(f2, f3); - f0 = #VPMADDUBSW_256(f0, shift2); - f2 = #VPMADDUBSW_256(f2, shift2); - f0 = #VPACKUS_16u16(f0, f2); - f0 = #VPERMD(permidx, f0); - (u256)[rp + 32*i] = f0; - } - - return a; -} - -fn _poly_compress_1(reg ptr u8[KYBER_POLYCOMPRESSEDBYTES] rp, reg ptr u16[KYBER_N] a) -> reg ptr u8[KYBER_POLYCOMPRESSEDBYTES], reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 f0 f1 f2 f3 v shift1 mask shift2 permidx; - reg ptr u16[16] x16p; - - a = _poly_csubq(a); - - x16p = jvx16; - v = x16p[u256 0]; - shift1 = #VPBROADCAST_16u16(pc_shift1_s); - mask = #VPBROADCAST_16u16(pc_mask_s); - shift2 = #VPBROADCAST_16u16(pc_shift2_s); - permidx = pc_permidx_s[u256 0]; - - for i=0 to KYBER_N/64 - { - f0 = a[u256 4*i]; - f1 = a[u256 4*i + 1]; - f2 = a[u256 4*i + 2]; - f3 = a[u256 4*i + 3]; - f0 = #VPMULH_16u16(f0, v); - f1 = #VPMULH_16u16(f1, v); - f2 = #VPMULH_16u16(f2, v); - f3 = #VPMULH_16u16(f3, v); - f0 = #VPMULHRS_16u16(f0, shift1); - f1 = #VPMULHRS_16u16(f1, shift1); - f2 = #VPMULHRS_16u16(f2, shift1); - f3 = #VPMULHRS_16u16(f3, shift1); - f0 = #VPAND_256(f0, mask); - f1 = #VPAND_256(f1, mask); - f2 = #VPAND_256(f2, mask); - f3 = #VPAND_256(f3, mask); - f0 = #VPACKUS_16u16(f0, f1); - f2 = #VPACKUS_16u16(f2, f3); - f0 = #VPMADDUBSW_256(f0, shift2); - f2 = #VPMADDUBSW_256(f2, shift2); - f0 = #VPACKUS_16u16(f0, f2); - f0 = #VPERMD(permidx, f0); - rp.[u256 32*i] = f0; - } - - return rp, a; -} - -u8[32] pd_jshufbidx = {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3, - 4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7}; -u32 pd_mask_s = 0x00F0000F; -u32 pd_shift_s = 0x800800; - -fn _poly_decompress(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] -{ - inline int i; - reg u256 f q shufbidx mask shift; - reg ptr u16[16] x16p; - reg ptr u8[32] x32p; - - x16p = jqx16; - q = x16p[u256 0]; - x32p = pd_jshufbidx; - shufbidx = x32p[u256 0]; - mask = #VPBROADCAST_8u32(pd_mask_s); - shift = #VPBROADCAST_8u32(pd_shift_s); - - f = #set0_256(); - - for i=0 to KYBER_N/16 - { - f = #VPBROADCAST_2u128((u128)[ap + 8*i]); - f = #VPSHUFB_256(f, shufbidx); - f = #VPAND_256(f, mask); - f = #VPMULL_16u16(f, shift); - f = #VPMULHRS_16u16(f, q); - rp[u256 i] = f; - } - - return rp; -} - - -fn _poly_frombytes(reg ptr u16[KYBER_N] rp, reg u64 ap) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 t0 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 t11 tt mask; - reg ptr u16[16] maskp; - - maskp = maskx16; - mask = maskp[u256 0]; - - for i=0 to 2 - { - t0 = (u256)[ap + 192*i]; - t1 = (u256)[ap + 192*i + 32]; - t2 = (u256)[ap + 192*i + 64]; - t3 = (u256)[ap + 192*i + 96]; - t4 = (u256)[ap + 192*i + 128]; - t5 = (u256)[ap + 192*i + 160]; - - tt, t3 = __shuffle8(t0, t3); - t0, t4 = __shuffle8(t1, t4); - t1, t5 = __shuffle8(t2, t5); - - t2, t4 = __shuffle4(tt, t4); - tt, t1 = __shuffle4(t3, t1); - t3, t5 = __shuffle4(t0, t5); - - t0, t1 = __shuffle2(t2, t1); - t2, t3 = __shuffle2(t4, t3); - t4, t5 = __shuffle2(tt, t5); - - t6, t3 = __shuffle1(t0, t3); - t0, t4 = __shuffle1(t1, t4); - t1, t5 = __shuffle1(t2, t5); - - t7 = #VPSRL_16u16(t6, 12); - t8 = #VPSLL_16u16(t3, 4); - t7 = #VPOR_256(t7, t8); - t6 = #VPAND_256(mask, t6); - t7 = #VPAND_256(mask, t7); - - t8 = #VPSRL_16u16(t3, 8); - t9 = #VPSLL_16u16(t0, 8); - t8 = #VPOR_256(t8,t9); - t8 = #VPAND_256(mask,t8); - - t9 = #VPSRL_16u16(t0, 4); - t9 = #VPAND_256(mask, t9); - - t10 = #VPSRL_16u16(t4, 12); - t11 = #VPSLL_16u16(t1, 4); - t10 = #VPOR_256(t10, t11); - t4 = #VPAND_256(mask,t4); - t10 = #VPAND_256(mask, t10); - - t11 = #VPSRL_16u16(t1, 8); - tt = #VPSLL_16u16(t5, 8); - t11 = #VPOR_256(t11, tt); - t11 = #VPAND_256(mask, t11); - - tt = #VPSRL_16u16(t5, 4); - tt = #VPAND_256(mask, tt); - - rp[u256 8*i] = t6; - rp[u256 8*i + 1] = t7; - rp[u256 8*i + 2] = t8; - rp[u256 8*i + 3] = t9; - rp[u256 8*i + 4] = t4; - rp[u256 8*i + 5] = t10; - rp[u256 8*i + 6] = t11; - rp[u256 8*i + 7] = tt; - } - - return rp; -} - -param int DMONT = 1353; /* (1ULL << 32) % KYBER_Q */ - -fn _poly_frommont(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u256 t qx16 qinvx16 dmontx16; - inline int i; - reg ptr u16[16] x16p; - - x16p = jqx16; - qx16 = x16p[u256 0]; - x16p = jqinvx16; - qinvx16 = x16p[u256 0]; - x16p = jdmontx16; - dmontx16 = x16p[u256 0]; - - for i=0 to KYBER_N/16 - { - t = rp[u256 i]; - t = __fqmulx16(t, dmontx16, qx16, qinvx16); - rp[u256 i] = t; - } - - return rp; -} - -u32[4] pfm_shift_s = {3, 2, 1, 0}; -u8[16] pfm_idx_s = {0, 1, 4, 5, 8, 9, 12, 13, - 2, 3, 6, 7, 10, 11, 14, 15}; - -fn _poly_frommsg(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] -{ - inline int i; - reg u256 f g0 g1 g2 g3 h0 h1 h2 h3; - reg u256 shift idx hqs; - reg ptr u16[16] x16p; - - x16p = hqx16_p1; - hqs = x16p[u256 0]; - shift = #VPBROADCAST_2u128(pfm_shift_s[u128 0]); - idx = #VPBROADCAST_2u128(pfm_idx_s[u128 0]); - - f = (u256)[ap]; - - for i=0 to 4 - { - g3 = #VPSHUFD_256(f, 0x55*i); - g3 = #VPSLLV_8u32(g3, shift); - g3 = #VPSHUFB_256(g3, idx); - g0 = #VPSLL_16u16(g3,12); - g1 = #VPSLL_16u16(g3,8); - g2 = #VPSLL_16u16(g3,4); - g0 = #VPSRA_16u16(g0,15); - g1 = #VPSRA_16u16(g1,15); - g2 = #VPSRA_16u16(g2,15); - g3 = #VPSRA_16u16(g3,15); - g0 = #VPAND_256(g0,hqs); - g1 = #VPAND_256(g1,hqs); - g2 = #VPAND_256(g2,hqs); - g3 = #VPAND_256(g3,hqs); - h0 = #VPUNPCKL_4u64(g0,g1); - h2 = #VPUNPCKH_4u64(g0,g1); - h1 = #VPUNPCKL_4u64(g2,g3); - h3 = #VPUNPCKH_4u64(g2,g3); - g0 = #VPERM2I128(h0,h1,0x20); - g2 = #VPERM2I128(h0,h1,0x31); - g1 = #VPERM2I128(h2,h3,0x20); - g3 = #VPERM2I128(h2,h3,0x31); - rp[u256 2*i] = g0; - rp[u256 2*i + 1] = g1; - rp[u256 2*i + 8] = g2; - rp[u256 2*i + 8 + 1] = g3; - } - - return rp; -} - - -fn _poly_frommsg_1(reg ptr u16[KYBER_N] rp, reg ptr u8[32] ap) -> stack u16[KYBER_N] -{ - inline int i; - reg u256 f g0 g1 g2 g3 h0 h1 h2 h3; - reg u256 shift idx hqs; - reg ptr u16[16] x16p; - - x16p = hqx16_p1; - hqs = x16p[u256 0]; - shift = #VPBROADCAST_2u128(pfm_shift_s[u128 0]); - idx = #VPBROADCAST_2u128(pfm_idx_s[u128 0]); - - f = ap[u256 0]; - - for i=0 to 4 - { - g3 = #VPSHUFD_256(f, 0x55*i); - g3 = #VPSLLV_8u32(g3, shift); - g3 = #VPSHUFB_256(g3, idx); - g0 = #VPSLL_16u16(g3,12); - g1 = #VPSLL_16u16(g3,8); - g2 = #VPSLL_16u16(g3,4); - g0 = #VPSRA_16u16(g0,15); - g1 = #VPSRA_16u16(g1,15); - g2 = #VPSRA_16u16(g2,15); - g3 = #VPSRA_16u16(g3,15); - g0 = #VPAND_256(g0,hqs); - g1 = #VPAND_256(g1,hqs); - g2 = #VPAND_256(g2,hqs); - g3 = #VPAND_256(g3,hqs); - h0 = #VPUNPCKL_4u64(g0,g1); - h2 = #VPUNPCKH_4u64(g0,g1); - h1 = #VPUNPCKL_4u64(g2,g3); - h3 = #VPUNPCKH_4u64(g2,g3); - g0 = #VPERM2I128(h0,h1,0x20); - g2 = #VPERM2I128(h0,h1,0x31); - g1 = #VPERM2I128(h2,h3,0x20); - g3 = #VPERM2I128(h2,h3,0x31); - rp[u256 2*i] = g0; - rp[u256 2*i + 1] = g1; - rp[u256 2*i + 8] = g2; - rp[u256 2*i + 8 + 1] = g3; - } - - return rp; -} - - -param int NOISE_NBLOCKS = (KYBER_ETA1 * KYBER_N/4 + SHAKE256_RATE - 1)/SHAKE256_RATE; - -u8[32] cbd_jshufbidx = {0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1, - 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1, 13, 14, 15, -1}; - -inline -fn __cbd3(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8] buf) -> reg ptr u16[KYBER_N]{ - inline int i; - reg u256 f0 f1 f2 f3; - reg u256 mask249 mask6DB mask07 mask70 mask3 shufbidx; - stack u32 mask249_s mask6DB_s mask07_s mask70_s; - stack u16 mask3_s; - - mask249_s = 0x249249; - mask6DB_s = 0x6DB6DB; - mask07_s = 7; - mask70_s = (7 << 16); - mask3_s = 3; - - mask249 = #VPBROADCAST_8u32(mask249_s); - mask6DB = #VPBROADCAST_8u32(mask6DB_s); - mask07 = #VPBROADCAST_8u32(mask07_s); - mask70 = #VPBROADCAST_8u32(mask70_s); - mask3 = #VPBROADCAST_16u16(mask3_s); - shufbidx = cbd_jshufbidx[u256 0]; - - for i=0 to KYBER_N/32 - { - f0 = buf.[u256 24*i]; - f0 = #VPERMQ(f0, 0x94); - f0 = #VPSHUFB_256(f0, shufbidx); - - f1 = #VPSRL_8u32(f0, 1); - f2 = #VPSRL_8u32(f0, 2); - f0 = #VPAND_256(mask249, f0); - f1 = #VPAND_256(mask249, f1); - f2 = #VPAND_256(mask249, f2); - f0 = #VPADD_8u32(f0, f1); - f0 = #VPADD_8u32(f0, f2); - - f1 = #VPSRL_8u32(f0, 3); - f0 = #VPADD_8u32(f0, mask6DB); - f0 = #VPSUB_8u32(f0, f1); - - f1 = #VPSLL_8u32(f0, 10); - f2 = #VPSRL_8u32(f0, 12); - f3 = #VPSRL_8u32(f0, 2); - f0 = #VPAND_256(f0, mask07); - f1 = #VPAND_256(f1, mask70); - f2 = #VPAND_256(f2, mask07); - f3 = #VPAND_256(f3, mask70); - f0 = #VPADD_16u16(f0, f1); - f1 = #VPADD_16u16(f2, f3); - f0 = #VPSUB_16u16(f0, mask3); - f1 = #VPSUB_16u16(f1, mask3); - - f2 = #VPUNPCKL_8u32(f0, f1); - f3 = #VPUNPCKH_8u32(f0, f1); - - f0 = #VPERM2I128(f2, f3, 0x20); - f1 = #VPERM2I128(f2, f3, 0x31); - - rp[u256 2*i] = f0; - rp[u256 2*i + 1] = f1; - } - - return rp; -} - - -inline -fn __cbd2(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_ETA2*KYBER_N/4] buf) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 f0 f1 f2 f3; - reg u256 mask55 mask33 mask03 mask0F; - reg u128 t; - stack u32 mask55_s mask33_s mask03_s mask0F_s; - - mask55_s = 0x55555555; - mask33_s = 0x33333333; - mask03_s = 0x03030303; - mask0F_s = 0x0F0F0F0F; - - mask55 = #VPBROADCAST_8u32(mask55_s); - mask33 = #VPBROADCAST_8u32(mask33_s); - mask03 = #VPBROADCAST_8u32(mask03_s); - mask0F = #VPBROADCAST_8u32(mask0F_s); - - for i=0 to KYBER_N/64 - { - f0 = buf[u256 i]; - - f1 = #VPSRL_16u16(f0, 1); - f0 = #VPAND_256(mask55, f0); - f1 = #VPAND_256(mask55, f1); - f0 = #VPADD_32u8(f0, f1); - - f1 = #VPSRL_16u16(f0, 2); - f0 = #VPAND_256(mask33, f0); - f1 = #VPAND_256(mask33, f1); - f0 = #VPADD_32u8(f0, mask33); - f0 = #VPSUB_32u8(f0, f1); - - f1 = #VPSRL_16u16(f0, 4); - f0 = #VPAND_256(mask0F, f0); - f1 = #VPAND_256(mask0F, f1); - f0 = #VPSUB_32u8(f0, mask03); - f1 = #VPSUB_32u8(f1, mask03); - - f2 = #VPUNPCKL_32u8(f0, f1); - f3 = #VPUNPCKH_32u8(f0, f1); - - t = (128u)f2; - f0 = #VPMOVSX_16u8_16u16(t); - t = #VEXTRACTI128(f2, 1); - f1 = #VPMOVSX_16u8_16u16(t); - t = (128u)f3; - f2 = #VPMOVSX_16u8_16u16(t); - t = #VEXTRACTI128(f3, 1); - f3 = #VPMOVSX_16u8_16u16(t); - rp[u256 4*i] = f0; - rp[u256 4*i + 1] = f2; - rp[u256 4*i + 2] = f1; - rp[u256 4*i + 3] = f3; - } - - return rp; -} - -/* buf 32 bytes longer for cbd3 (KYBER_ETA1 == 3) */ -inline -fn __poly_cbd_eta1(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8] buf) -> reg ptr u16[KYBER_N] -{ - if(KYBER_ETA1 == 2) { // resolved at compile-time - rp = __cbd2(rp, buf[0:KYBER_ETA2*KYBER_N/4]); - } else { - rp = __cbd3(rp, buf); - } - - return rp; -} - -inline -fn __poly_cbd_eta2(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_ETA2*KYBER_N/4] buf) -> reg ptr u16[KYBER_N] -{ - if(KYBER_ETA2 == 2) { - rp = __cbd2(rp, buf); - } - return rp; -} - -/* -#[returnaddress="stack"] -fn _poly_getnoise(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_SYMBYTES] seed, reg u8 nonce) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 f0 f1 f2 f3; - reg u256 mask55 mask33 mask03 mask0F; - reg u128 t; - reg u64 t64; - stack ptr u16[KYBER_N] srp; - stack u8[128] buf; - stack u8[33] extseed; - stack u32 mask55_s mask33_s mask03_s mask0F_s; - - mask55_s = 0x55555555; - mask33_s = 0x33333333; - mask03_s = 0x03030303; - mask0F_s = 0x0F0F0F0F; - - srp = rp; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = seed[u64 i]; - extseed[u64 i] = t64; - } - extseed[KYBER_SYMBYTES] = nonce; - - buf = _shake256_128_33(buf, extseed); - - mask55 = #VPBROADCAST_8u32(mask55_s); - mask33 = #VPBROADCAST_8u32(mask33_s); - mask03 = #VPBROADCAST_8u32(mask03_s); - mask0F = #VPBROADCAST_8u32(mask0F_s); - - rp = srp; - - for i=0 to KYBER_N/64 - { - f0 = buf[u256 i]; - - f1 = #VPSRL_16u16(f0, 1); - f0 = #VPAND_256(mask55, f0); - f1 = #VPAND_256(mask55, f1); - f0 = #VPADD_32u8(f0, f1); - - f1 = #VPSRL_16u16(f0, 2); - f0 = #VPAND_256(mask33, f0); - f1 = #VPAND_256(mask33, f1); - f0 = #VPADD_32u8(f0, mask33); - f0 = #VPSUB_32u8(f0, f1); - - f1 = #VPSRL_16u16(f0, 4); - f0 = #VPAND_256(mask0F, f0); - f1 = #VPAND_256(mask0F, f1); - f0 = #VPSUB_32u8(f0, mask03); - f1 = #VPSUB_32u8(f1, mask03); - - f2 = #VPUNPCKL_32u8(f0, f1); - f3 = #VPUNPCKH_32u8(f0, f1); - - t = (128u)f2; - f0 = #VPMOVSX_16u8_16u16(t); - t = #VEXTRACTI128(f2, 1); - f1 = #VPMOVSX_16u8_16u16(t); - t = (128u)f3; - f2 = #VPMOVSX_16u8_16u16(t); - t = #VEXTRACTI128(f3, 1); - f3 = #VPMOVSX_16u8_16u16(t); - rp[u256 4*i] = f0; - rp[u256 4*i + 1] = f2; - rp[u256 4*i + 2] = f1; - rp[u256 4*i + 3] = f3; - } - - return rp; -} -*/ - -inline -fn __shake256_squeezenblocks4x(reg ptr u256[25] state, reg ptr u8[NOISE_NBLOCKS * SHAKE256_RATE] buf0 buf1 buf2 buf3) -> reg ptr u256[25], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE], reg ptr u8[NOISE_NBLOCKS*SHAKE256_RATE] -{ - inline int i; - - for i = 0 to NOISE_NBLOCKS - { - state, buf0[i*SHAKE256_RATE:SHAKE256_RATE], buf1[i*SHAKE256_RATE:SHAKE256_RATE], buf2[i*SHAKE256_RATE:SHAKE256_RATE], buf3[i*SHAKE256_RATE:SHAKE256_RATE] = __shake256_squeezeblock4x(state, buf0[i*SHAKE256_RATE:SHAKE256_RATE], buf1[i*SHAKE256_RATE:SHAKE256_RATE], buf2[i*SHAKE256_RATE:SHAKE256_RATE], buf3[i*SHAKE256_RATE:SHAKE256_RATE]); - } - - return state, buf0, buf1, buf2, buf3; -} - -#[returnaddress="stack"] -fn _poly_getnoise_eta1_4x(reg ptr u16[KYBER_N] r0 r1 r2 r3, reg ptr u8[KYBER_SYMBYTES] seed, reg u8 nonce) -> reg ptr u16[KYBER_N], reg ptr u16[KYBER_N], reg ptr u16[KYBER_N], reg ptr u16[KYBER_N] -{ - reg u256 f; - stack u256[25] state; - stack u8[NOISE_NBLOCKS * SHAKE256_RATE] buf0 buf1 buf2 buf3; - - f = seed[u256 0]; - buf0[u256 0] = f; - buf1[u256 0] = f; - buf2[u256 0] = f; - buf3[u256 0] = f; - - buf0.[32] = nonce; - nonce += 1; - buf1.[32] = nonce; - nonce += 1; - buf2.[32] = nonce; - nonce += 1; - buf3.[32] = nonce; - - state = _shake256_absorb4x_33(state, buf0[0:33], buf1[0:33], buf2[0:33], buf3[0:33]); - state, buf0, buf1, buf2, buf3 = __shake256_squeezenblocks4x(state, buf0, buf1, buf2, buf3); - - r0 = __poly_cbd_eta1(r0, buf0[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]); - r1 = __poly_cbd_eta1(r1, buf1[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]); - r2 = __poly_cbd_eta1(r2, buf2[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]); - r3 = __poly_cbd_eta1(r3, buf3[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]); - - return r0, r1, r2, r3; -} - -#[returnaddress="stack"] -fn _poly_getnoise_eta1122_4x(reg ptr u16[KYBER_N] r0 r1 r2 r3, reg ptr u8[KYBER_SYMBYTES] seed, reg u8 nonce) -> reg ptr u16[KYBER_N], reg ptr u16[KYBER_N], reg ptr u16[KYBER_N], reg ptr u16[KYBER_N] -{ - reg u256 f; - stack u256[25] state; - stack u8[NOISE_NBLOCKS * SHAKE256_RATE] buf0 buf1 buf2 buf3; - - f = seed[u256 0]; - buf0[u256 0] = f; - buf1[u256 0] = f; - buf2[u256 0] = f; - buf3[u256 0] = f; - - buf0.[32] = nonce; - nonce += 1; - buf1.[32] = nonce; - nonce += 1; - buf2.[32] = nonce; - nonce += 1; - buf3.[32] = nonce; - - state = _shake256_absorb4x_33(state, buf0[0:33], buf1[0:33], buf2[0:33], buf3[0:33]); - state, buf0, buf1, buf2, buf3 = __shake256_squeezenblocks4x(state, buf0, buf1, buf2, buf3); - - r0 = __poly_cbd_eta1(r0, buf0[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]); - r1 = __poly_cbd_eta1(r1, buf1[0:KYBER_ETA1*KYBER_N/4+(KYBER_ETA1 - 2)*8]); - r2 = __poly_cbd_eta2(r2, buf2[0:KYBER_ETA2*KYBER_N/4]); - r3 = __poly_cbd_eta2(r3, buf3[0:KYBER_ETA2*KYBER_N/4]); - - return r0, r1, r2, r3; -} - - -inline -fn __invntt___butterfly64x(reg u256 rl0 rl1 rl2 rl3 rh0 rh1 rh2 rh3 zl0 zl1 zh0 zh1 qx16) - -> reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 t0 t1 t2 t3; - - t0 = #VPSUB_16u16(rl0, rh0); - t1 = #VPSUB_16u16(rl1, rh1); - t2 = #VPSUB_16u16(rl2, rh2); - - rl0 = #VPADD_16u16(rh0, rl0); - rl1 = #VPADD_16u16(rh1, rl1); - rh0 = #VPMULL_16u16(zl0, t0); - - rl2 = #VPADD_16u16(rh2, rl2); - rh1 = #VPMULL_16u16(zl0, t1); - t3 = #VPSUB_16u16(rl3, rh3); - - rl3 = #VPADD_16u16(rh3, rl3); - rh2 = #VPMULL_16u16(zl1, t2); - rh3 = #VPMULL_16u16(zl1, t3); - - t0 = #VPMULH_16u16(zh0, t0); - t1 = #VPMULH_16u16(zh0, t1); - - t2 = #VPMULH_16u16(zh1, t2); - t3 = #VPMULH_16u16(zh1, t3); - - // Reduce - rh0 = #VPMULH_16u16(qx16, rh0); - rh1 = #VPMULH_16u16(qx16, rh1); - rh2 = #VPMULH_16u16(qx16, rh2); - rh3 = #VPMULH_16u16(qx16, rh3); - - rh0 = #VPSUB_16u16(t0, rh0); - rh1 = #VPSUB_16u16(t1, rh1); - rh2 = #VPSUB_16u16(t2, rh2); - rh3 = #VPSUB_16u16(t3, rh3); - - return rl0, rl1, rl2, rl3, rh0, rh1, rh2, rh3; -} - -fn _poly_invntt(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u256 zeta0 zeta1 zeta2 zeta3 r0 r1 r2 r3 r4 r5 r6 r7 qx16 vx16 flox16 fhix16; - reg ptr u16[400] zetasp; - reg ptr u16[16] qx16p; - inline int i; - - zetasp = jzetas_inv_exp; - qx16 = jqx16[u256 0]; - - for i=0 to 2 - { - // level 0: - zeta0 = zetasp.[u256 0+392*i]; - zeta1 = zetasp.[u256 64+392*i]; - zeta2 = zetasp.[u256 32+392*i]; - zeta3 = zetasp.[u256 96+392*i]; - - r0 = rp.[u256 32*0+256*i]; - r1 = rp.[u256 32*1+256*i]; - r2 = rp.[u256 32*2+256*i]; - r3 = rp.[u256 32*3+256*i]; - r4 = rp.[u256 32*4+256*i]; - r5 = rp.[u256 32*5+256*i]; - r6 = rp.[u256 32*6+256*i]; - r7 = rp.[u256 32*7+256*i]; - - r0, r1, r4, r5, r2, r3, r6, r7 = __invntt___butterfly64x(r0, r1, r4, r5, r2, r3, r6, r7, zeta0, zeta1, zeta2, zeta3, qx16); - - // level 1: - vx16 = jvx16[u256 0]; - zeta0 = zetasp.[u256 128+392*i]; - zeta1 = zetasp.[u256 160+392*i]; - r0 = __red16x(r0, qx16, vx16); - r1 = __red16x(r1, qx16, vx16); - r4 = __red16x(r4, qx16, vx16); - r5 = __red16x(r5, qx16, vx16); - - r0, r1, r2, r3, r4, r5, r6, r7 = __invntt___butterfly64x(r0, r1, r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - r0, r1 = __shuffle1(r0, r1); - r2, r3 = __shuffle1(r2, r3); - r4, r5 = __shuffle1(r4, r5); - r6, r7 = __shuffle1(r6, r7); - - // level 2: - zeta0 = zetasp.[u256 192+392*i]; - zeta1 = zetasp.[u256 224+392*i]; - - - r0, r2, r4, r6, r1, r3, r5, r7 = __invntt___butterfly64x(r0, r2, r4, r6, r1, r3, r5, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - r0 = __red16x(r0, qx16, vx16); - - r0, r2 = __shuffle2(r0, r2); - r4, r6 = __shuffle2(r4, r6); - r1, r3 = __shuffle2(r1, r3); - r5, r7 = __shuffle2(r5, r7); - - // level 3: - zeta0 = zetasp.[u256 256+392*i]; - zeta1 = zetasp.[u256 288+392*i]; - - r0, r4, r1, r5, r2, r6, r3, r7 = __invntt___butterfly64x(r0, r4, r1, r5, r2, r6, r3, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - r0 = __red16x(r0, qx16, vx16); - - r0, r4 = __shuffle4(r0, r4); - r1, r5 = __shuffle4(r1, r5); - r2, r6 = __shuffle4(r2, r6); - r3, r7 = __shuffle4(r3, r7); - - // level 4: - zeta0 = zetasp.[u256 320+392*i]; - zeta1 = zetasp.[u256 352+392*i]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __invntt___butterfly64x(r0, r1, r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - r0 = __red16x(r0, qx16, vx16); - - r0, r1 = __shuffle8(r0, r1); - r2, r3 = __shuffle8(r2, r3); - r4, r5 = __shuffle8(r4, r5); - r6, r7 = __shuffle8(r6, r7); - - // level 5: - zeta0 = #VPBROADCAST_8u32(zetasp.[u32 384+392*i]); - zeta1 = #VPBROADCAST_8u32(zetasp.[u32 388+392*i]); - - r0, r2, r4, r6, r1, r3, r5, r7 = __invntt___butterfly64x(r0, r2, r4, r6, r1, r3, r5, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - r0 = __red16x(r0, qx16, vx16); - - if (i==0) { - rp.[u256 32*0+256*i] = r0; - rp.[u256 32*1+256*i] = r2; - rp.[u256 32*2+256*i] = r4; - rp.[u256 32*3+256*i] = r6; - } - rp.[u256 32*4+256*i] = r1; - rp.[u256 32*5+256*i] = r3; - rp.[u256 32*6+256*i] = r5; - rp.[u256 32*7+256*i] = r7; - } - - zeta0 = #VPBROADCAST_8u32(zetasp.[u32 784]); - zeta1 = #VPBROADCAST_8u32(zetasp.[u32 788]); - - for i=0 to 2 - { - if (i == 0) { - r7 = r6; - r6 = r4; - r5 = r2; - r4 = r0; - } else { - r4 = rp.[u256 32*8+128*i]; - r5 = rp.[u256 32*9+128*i]; - r6 = rp.[u256 32*10+128*i]; - r7 = rp.[u256 32*11+128*i]; - } - r0 = rp.[u256 32*0+128*i]; - r1 = rp.[u256 32*1+128*i]; - r2 = rp.[u256 32*2+128*i]; - r3 = rp.[u256 32*3+128*i]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __invntt___butterfly64x(r0, r1, r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - flox16 = jflox16[u256 0]; - fhix16 = jfhix16[u256 0]; - - rp.[u256 32*8+128*i] = r4; - rp.[u256 32*9+128*i] = r5; - rp.[u256 32*10+128*i] = r6; - rp.[u256 32*11+128*i] = r7; - - r0 = __fqmulprecomp16x(r0, flox16, fhix16, qx16); - r1 = __fqmulprecomp16x(r1, flox16, fhix16, qx16); - r2 = __fqmulprecomp16x(r2, flox16, fhix16, qx16); - r3 = __fqmulprecomp16x(r3, flox16, fhix16, qx16); - - rp.[u256 32*0+128*i] = r0; - rp.[u256 32*1+128*i] = r1; - rp.[u256 32*2+128*i] = r2; - rp.[u256 32*3+128*i] = r3; - } - - return rp; -} - -inline -fn __butterfly64x(reg u256 rl0 rl1 rl2 rl3 rh0 rh1 rh2 rh3 zl0 zl1 zh0 zh1 qx16) - -> reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256 -{ - reg u256 t0 t1 t2 t3 t4 t5 t6 t7; - - t0 = #VPMULL_16u16(zl0, rh0); - t1 = #VPMULH_16u16(zh0, rh0); - t2 = #VPMULL_16u16(zl0, rh1); - t3 = #VPMULH_16u16(zh0, rh1); - t4 = #VPMULL_16u16(zl1, rh2); - t5 = #VPMULH_16u16(zh1, rh2); - t6 = #VPMULL_16u16(zl1, rh3); - t7 = #VPMULH_16u16(zh1, rh3); - - t0 = #VPMULH_16u16(t0, qx16); - t2 = #VPMULH_16u16(t2, qx16); - t4 = #VPMULH_16u16(t4, qx16); - t6 = #VPMULH_16u16(t6, qx16); - - //rh1 = #VPSUB_16u16(t3, rl1); - rh1 = #VPSUB_16u16(rl1, t3); - rl1 = #VPADD_16u16(t3, rl1); - //rh0 = #VPSUB_16u16(t1, rl0); - rh0 = #VPSUB_16u16(rl0, t1); - rl0 = #VPADD_16u16(t1, rl0); - //rh3 = #VPSUB_16u16(t7, rl3); - rh3 = #VPSUB_16u16(rl3, t7); - rl3 = #VPADD_16u16(t7, rl3); - //rh2 = #VPSUB_16u16(t5, rl2); - rh2 = #VPSUB_16u16(rl2, t5); - rl2 = #VPADD_16u16(t5, rl2); - - rh0 = #VPADD_16u16(t0, rh0); - //rl0 = #VPSUB_16u16(t0, rl0); - rl0 = #VPSUB_16u16(rl0, t0); - rh1 = #VPADD_16u16(t2, rh1); - //rl1 = #VPSUB_16u16(t2, rl1); - rl1 = #VPSUB_16u16(rl1, t2); - rh2 = #VPADD_16u16(t4, rh2); - //rl2 = #VPSUB_16u16(t4, rl2); - rl2 = #VPSUB_16u16(rl2, t4); - rh3 = #VPADD_16u16(t6, rh3); - //rl3 = #VPSUB_16u16(t6, rl3); - rl3 = #VPSUB_16u16(rl3, t6); - - return rl0, rl1, rl2, rl3, rh0, rh1, rh2, rh3; -} - -fn _poly_ntt(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u256 zeta0 zeta1 zeta2 zeta3 r0 r1 r2 r3 r4 r5 r6 r7 qx16 vx16; - reg u32 t; - reg u16 w; - reg ptr u16[400] zetasp; - inline int i; - - zetasp = jzetas_exp; - qx16 = jqx16[u256 0]; - - zeta0 = #VPBROADCAST_8u32(zetasp[u32 0]); - zeta1 = #VPBROADCAST_8u32(zetasp[u32 1]); - - r0 = rp.[u256 32*0]; - r1 = rp.[u256 32*1]; - r2 = rp.[u256 32*2]; - r3 = rp.[u256 32*3]; - r4 = rp.[u256 32*8]; - r5 = rp.[u256 32*9]; - r6 = rp.[u256 32*10]; - r7 = rp.[u256 32*11]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __butterfly64x(r0, r1, r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - rp.[u256 32*0] = r0; - rp.[u256 32*1] = r1; - rp.[u256 32*2] = r2; - rp.[u256 32*3] = r3; - rp.[u256 32*8] = r4; - rp.[u256 32*9] = r5; - rp.[u256 32*10] = r6; - rp.[u256 32*11] = r7; - - r0 = rp.[u256 32*4]; - r1 = rp.[u256 32*5]; - r2 = rp.[u256 32*6]; - r3 = rp.[u256 32*7]; - r4 = rp.[u256 32*12]; - r5 = rp.[u256 32*13]; - r6 = rp.[u256 32*14]; - r7 = rp.[u256 32*15]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __butterfly64x(r0, r1, r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - /* - rp.[u256 32*4] = r0; - rp.[u256 32*5] = r1; - rp.[u256 32*6] = r2; - rp.[u256 32*7] = r3; - */ - rp.[u256 32*12] = r4; - rp.[u256 32*13] = r5; - rp.[u256 32*14] = r6; - rp.[u256 32*15] = r7; - - for i=0 to 2 { - - // level 1 - zeta0 = #VPBROADCAST_8u32(zetasp.[u32 8 + 392*i]); - zeta1 = #VPBROADCAST_8u32(zetasp.[u32 12 + 392*i]); - - if ( i == 0) { - r4 = r0; - r5 = r1; - r6 = r2; - r7 = r3; - } else { - r4 = rp.[u256 32*4+256*i]; - r5 = rp.[u256 32*5+256*i]; - r6 = rp.[u256 32*6+256*i]; - r7 = rp.[u256 32*7+256*i]; - } - r0 = rp.[u256 32*0+256*i]; - r1 = rp.[u256 32*1+256*i]; - r2 = rp.[u256 32*2+256*i]; - r3 = rp.[u256 32*3+256*i]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __butterfly64x(r0, r1, r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - // level 2 - zeta0 = zetasp.[u256 16 + 392*i]; - zeta1 = zetasp.[u256 48 + 392*i]; - - r0, r4 = __shuffle8(r0, r4); - r1, r5 = __shuffle8(r1, r5); - r2, r6 = __shuffle8(r2, r6); - r3, r7 = __shuffle8(r3, r7); - - r0, r4, r1, r5, r2, r6, r3, r7 = __butterfly64x(r0, r4, r1, r5, r2, r6, r3, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - // level 3 - zeta0 = zetasp.[u256 80 + 392*i]; - zeta1 = zetasp.[u256 112 + 392*i]; - - r0, r2 = __shuffle4(r0, r2); - r4, r6 = __shuffle4(r4, r6); - r1, r3 = __shuffle4(r1, r3); - r5, r7 = __shuffle4(r5, r7); - - r0, r2, r4, r6, r1, r3, r5, r7 = __butterfly64x(r0, r2, r4, r6, r1, r3, r5, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - // level 4 - zeta0 = zetasp.[u256 144 + 392*i]; - zeta1 = zetasp.[u256 176 + 392*i]; - - r0, r1 = __shuffle2(r0, r1); - r2, r3 = __shuffle2(r2, r3); - r4, r5 = __shuffle2(r4, r5); - r6, r7 = __shuffle2(r6, r7); - - r0, r1, r2, r3, r4, r5, r6, r7 = __butterfly64x(r0, r1, r2, r3, r4, r5, r6, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - // level 5 - zeta0 = zetasp.[u256 208 + 392*i]; - zeta1 = zetasp.[u256 240 + 392*i]; - - r0, r4 = __shuffle1(r0, r4); - r1, r5 = __shuffle1(r1, r5); - r2, r6 = __shuffle1(r2, r6); - r3, r7 = __shuffle1(r3, r7); - - r0, r4, r1, r5, r2, r6, r3, r7 = __butterfly64x(r0, r4, r1, r5, r2, r6, r3, r7, zeta0, zeta0, zeta1, zeta1, qx16); - - // level 6 - zeta0 = zetasp.[u256 272 + 392*i]; - zeta2 = zetasp.[u256 304 + 392*i]; - zeta1 = zetasp.[u256 336 + 392*i]; - zeta3 = zetasp.[u256 368 + 392*i]; - - r0, r4, r2, r6, r1, r5, r3, r7 = __butterfly64x(r0, r4, r2, r6, r1, r5, r3, r7, zeta0, zeta1, zeta2, zeta3, qx16); - - vx16 = jvx16[u256 0]; - - r0 = __red16x(r0, qx16, vx16); - r4 = __red16x(r4, qx16, vx16); - r2 = __red16x(r2, qx16, vx16); - r6 = __red16x(r6, qx16, vx16); - r1 = __red16x(r1, qx16, vx16); - r5 = __red16x(r5, qx16, vx16); - r3 = __red16x(r3, qx16, vx16); - r7 = __red16x(r7, qx16, vx16); - - rp.[u256 32*0+256*i] = r0; - rp.[u256 32*1+256*i] = r4; - rp.[u256 32*2+256*i] = r1; - rp.[u256 32*3+256*i] = r5; - rp.[u256 32*4+256*i] = r2; - rp.[u256 32*5+256*i] = r6; - rp.[u256 32*6+256*i] = r3; - rp.[u256 32*7+256*i] = r7; - } - - return rp; -} - -inline -fn __poly_reduce(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 r qx16 vx16; - - qx16 = jqx16[u256 0]; - vx16 = jvx16[u256 0]; - - for i=0 to 16 - { - r = rp.[u256 32*i]; - r = __red16x(r, qx16, vx16); - rp.[u256 32*i] = r; - } - return rp; -} - -fn _poly_sub(reg ptr u16[KYBER_N] rp ap bp) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 a; - reg u256 b; - reg u256 r; - - for i = 0 to 16 { - a = ap.[u256 32*i]; - b = bp.[u256 32*i]; - r = #VPSUB_16u16(a, b); - rp.[u256 32*i] = r; - } - - return rp; -} - -fn _poly_tobytes(reg u64 rp, reg ptr u16[KYBER_N] a) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 t0 t1 t2 t3 t4 t5 t6 t7 qx16 tt ttt; - reg ptr u16[16] jqx16_p; - - jqx16_p = jqx16; - qx16 = jqx16_p[u256 0]; - - a = _poly_csubq(a); - - for i = 0 to 2 - { - t0 = a[u256 8*i]; - t1 = a[u256 8*i + 1]; - t2 = a[u256 8*i + 2]; - t3 = a[u256 8*i + 3]; - t4 = a[u256 8*i + 4]; - t5 = a[u256 8*i + 5]; - t6 = a[u256 8*i + 6]; - t7 = a[u256 8*i + 7]; - - tt = #VPSLL_16u16(t1, 12); - tt |= t0; - - t0 = #VPSRL_16u16(t1, 4); - t1 = #VPSLL_16u16(t2, 8); - t0 |= t1; - - t1 = #VPSRL_16u16(t2, 8); - t2 = #VPSLL_16u16(t3, 4); - t1 |= t2; - - t2 = #VPSLL_16u16(t5, 12); - t2 |= t4; - - t3 = #VPSRL_16u16(t5, 4); - t4 = #VPSLL_16u16(t6, 8); - t3 |= t4; - - t4 = #VPSRL_16u16(t6, 8); - t5 = #VPSLL_16u16(t7, 4); - t4 |= t5; - - ttt, t0 = __shuffle1(tt, t0); - tt, t2 = __shuffle1(t1, t2); - t1, t4 = __shuffle1(t3, t4); - - t3, tt= __shuffle2(ttt, tt); - ttt, t0 = __shuffle2(t1, t0); - t1, t4 = __shuffle2(t2, t4); - - t2, ttt = __shuffle4(t3, ttt); - t3, tt = __shuffle4(t1, tt); - t1, t4 = __shuffle4(t0, t4); - - t0, t3 = __shuffle8(t2, t3); - t2, ttt = __shuffle8(t1, ttt); - t1, t4 = __shuffle8(tt, t4); - - (u256)[rp + 192*i] = t0; - (u256)[rp + 192*i + 32] = t2; - (u256)[rp + 192*i + 64] = t1; - (u256)[rp + 192*i + 96] = t3; - (u256)[rp + 192*i + 128] = ttt; - (u256)[rp + 192*i + 160] = t4; - } - - return a; -} - -fn _poly_tomsg(reg u64 rp, reg ptr u16[KYBER_N] a) -> reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 f0 f1 g0 g1 hq hhq; - reg ptr u16[16] px16; - reg u32 c; - - a = _poly_csubq(a); - - px16 = hqx16_m1; - hq = px16[u256 0]; - - px16 = hhqx16; - hhq = px16[u256 0]; - - for i=0 to KYBER_N/32 - { - f0 = a[u256 2*i]; - f1 = a[u256 2*i + 1]; - f0 = #VPSUB_16u16(hq, f0); - f1 = #VPSUB_16u16(hq, f1); - g0 = #VPSRA_16u16(f0, 15); - g1 = #VPSRA_16u16(f1, 15); - f0 = #VPXOR_256(f0, g0); - f1 = #VPXOR_256(f1, g1); - f0 = #VPSUB_16u16(f0, hhq); - f1 = #VPSUB_16u16(f1, hhq); - f0 = #VPACKSS_16u16(f0, f1); - f0 = #VPERMQ(f0, 0xD8); - c = #VPMOVMSKB_u256u32(f0); - (u32)[rp+4*i] = c; - } - return a; -} - -fn _poly_tomsg_1(reg ptr u8[KYBER_INDCPA_MSGBYTES] rp, reg ptr u16[KYBER_N] a) -> reg ptr u8[KYBER_INDCPA_MSGBYTES], reg ptr u16[KYBER_N] -{ - inline int i; - reg u256 f0 f1 g0 g1 hq hhq; - reg ptr u16[16] px16; - reg u32 c; - - a = _poly_csubq(a); - - px16 = hqx16_m1; - hq = px16[u256 0]; - - px16 = hhqx16; - hhq = px16[u256 0]; - - for i=0 to KYBER_N/32 - { - f0 = a[u256 2*i]; - f1 = a[u256 2*i + 1]; - f0 = #VPSUB_16u16(hq, f0); - f1 = #VPSUB_16u16(hq, f1); - g0 = #VPSRA_16u16(f0, 15); - g1 = #VPSRA_16u16(f1, 15); - f0 = #VPXOR_256(f0, g0); - f1 = #VPXOR_256(f1, g1); - f0 = #VPSUB_16u16(f0, hhq); - f1 = #VPSUB_16u16(f1, hhq); - f0 = #VPACKSS_16u16(f0, f1); - f0 = #VPERMQ(f0, 0xD8); - c = #VPMOVMSKB_u256u32(f0); - rp[u32 i] = c; - } - return rp, a; -} diff --git a/code/jasmin/avx2v/poly_ntt.c b/code/jasmin/avx2v/poly_ntt.c deleted file mode 100644 index 83341812..00000000 --- a/code/jasmin/avx2v/poly_ntt.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "poly.h" -#include "ntt.h" - -void poly_ntt_jazz(poly *r, int16_t *zetas) -{ - ntt(r->coeffs); - poly_reduce(r); -} - - diff --git a/code/jasmin/avx2v/polyvec.c b/code/jasmin/avx2v/polyvec.c deleted file mode 100644 index 316543a0..00000000 --- a/code/jasmin/avx2v/polyvec.c +++ /dev/null @@ -1,237 +0,0 @@ -#include -#include "polyvec.h" -#include "poly.h" - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(unsigned char *r, polyvec *a) -{ - int i,j,k; - - polyvec_csubq(a); - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;ivec[i].coeffs[8*j+k] << 11) + KYBER_Q/2) / KYBER_Q) & 0x7ff; - - r[11*j+ 0] = t[0] & 0xff; - r[11*j+ 1] = (t[0] >> 8) | ((t[1] & 0x1f) << 3); - r[11*j+ 2] = (t[1] >> 5) | ((t[2] & 0x03) << 6); - r[11*j+ 3] = (t[2] >> 2) & 0xff; - r[11*j+ 4] = (t[2] >> 10) | ((t[3] & 0x7f) << 1); - r[11*j+ 5] = (t[3] >> 7) | ((t[4] & 0x0f) << 4); - r[11*j+ 6] = (t[4] >> 4) | ((t[5] & 0x01) << 7); - r[11*j+ 7] = (t[5] >> 1) & 0xff; - r[11*j+ 8] = (t[5] >> 9) | ((t[6] & 0x3f) << 2); - r[11*j+ 9] = (t[6] >> 6) | ((t[7] & 0x07) << 5); - r[11*j+10] = (t[7] >> 3); - } - r += 352; - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;ivec[i].coeffs[4*j+k] << 10) + KYBER_Q/2) / KYBER_Q) & 0x3ff; - - r[5*j+ 0] = t[0] & 0xff; - r[5*j+ 1] = (t[0] >> 8) | ((t[1] & 0x3f) << 2); - r[5*j+ 2] = (t[1] >> 6) | ((t[2] & 0x0f) << 4); - r[5*j+ 3] = (t[2] >> 4) | ((t[3] & 0x03) << 6); - r[5*j+ 4] = (t[3] >> 2); - } - r += 320; - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - unsigned char *a: pointer to input byte array (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const unsigned char *a) -{ - int i,j; -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i].coeffs[8*j+0] = (((a[11*j+ 0] | (((uint32_t)a[11*j+ 1] & 0x07) << 8)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+1] = ((((a[11*j+ 1] >> 3) | (((uint32_t)a[11*j+ 2] & 0x3f) << 5)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+2] = ((((a[11*j+ 2] >> 6) | (((uint32_t)a[11*j+ 3] & 0xff) << 2) | (((uint32_t)a[11*j+ 4] & 0x01) << 10)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+3] = ((((a[11*j+ 4] >> 1) | (((uint32_t)a[11*j+ 5] & 0x0f) << 7)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+4] = ((((a[11*j+ 5] >> 4) | (((uint32_t)a[11*j+ 6] & 0x7f) << 4)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+5] = ((((a[11*j+ 6] >> 7) | (((uint32_t)a[11*j+ 7] & 0xff) << 1) | (((uint32_t)a[11*j+ 8] & 0x03) << 9)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+6] = ((((a[11*j+ 8] >> 2) | (((uint32_t)a[11*j+ 9] & 0x1f) << 6)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+7] = ((((a[11*j+ 9] >> 5) | (((uint32_t)a[11*j+10] & 0xff) << 3)) * KYBER_Q) + 1024) >> 11; - } - a += 352; - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i].coeffs[4*j+0] = (((a[5*j+ 0] | (((uint32_t)a[5*j+ 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+1] = ((((a[5*j+ 1] >> 2) | (((uint32_t)a[5*j+ 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+2] = ((((a[5*j+ 2] >> 4) | (((uint32_t)a[5*j+ 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+3] = ((((a[5*j+ 3] >> 6) | (((uint32_t)a[5*j+ 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10; - } - a += 320; - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(unsigned char *r, polyvec *a) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - unsigned char *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const unsigned char *a) -{ - int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_pointwise_acc -* -* Description: Pointwise multiply elements of a and b and accumulate into r -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) -{ - int i; - poly t; - - poly_basemul(r, &a->vec[0], &b->vec[0]); - for(i=1;ivec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of each element of a vector of polynomials -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_csubq(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/code/jasmin/avx2v/polyvec.h b/code/jasmin/avx2v/polyvec.h deleted file mode 100644 index 9fbdb673..00000000 --- a/code/jasmin/avx2v/polyvec.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -void polyvec_compress(unsigned char *r, polyvec *a); -void polyvec_decompress(polyvec *r, const unsigned char *a); - -void polyvec_tobytes(unsigned char *r, polyvec *a); -void polyvec_frombytes(polyvec *r, const unsigned char *a); - -void polyvec_ntt(polyvec *r); -void polyvec_invntt(polyvec *r); - -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce(polyvec *r); -void polyvec_csubq(polyvec *r); - -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - - - - -void polyvec_compress_jazz(unsigned char *r, polyvec *a); -void polyvec_decompress_jazz(polyvec *r, const unsigned char *a); - -void polyvec_tobytes_jazz(unsigned char *r, polyvec *a); -void polyvec_frombytes_jazz(polyvec *r, const unsigned char *a); - -void polyvec_ntt_jazz(polyvec *r); -void polyvec_invntt_jazz(polyvec *r); - -void polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce_jazz(polyvec *r); -void polyvec_csubq_jazz(polyvec *r); - -void polyvec_add2_jazz(polyvec *r, const polyvec *b); - - -#endif diff --git a/code/jasmin/avx2v/polyvec.jinc b/code/jasmin/avx2v/polyvec.jinc deleted file mode 100644 index d68af2ad..00000000 --- a/code/jasmin/avx2v/polyvec.jinc +++ /dev/null @@ -1,241 +0,0 @@ -require "params.jinc" -require "poly.jinc" -require "shuffle.jinc" - -inline -fn __polyvec_add2(stack u16[KYBER_VECN] r, stack u16[KYBER_VECN] b) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_add2(r[0:KYBER_N], b[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_add2(r[KYBER_N:KYBER_N], b[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_add2(r[2*KYBER_N:KYBER_N], b[2*KYBER_N:KYBER_N]); - - return r; -} - -inline -fn __polyvec_csubq(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_csubq(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_csubq(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_csubq(r[2*KYBER_N:KYBER_N]); - - return r; -} - -u32 pvd_q_s = 0x0d013404; -u8[32] pvd_shufbdidx_s = {0, 1, 1, 2, 2, 3, 3, 4, - 5, 6, 6, 7, 7, 8, 8, 9, - 2, 3, 3, 4, 4, 5, 5, 6, - 7, 8, 8, 9, 9, 10, 10, 11}; -u64 pvd_sllvdidx_s = 0x04; -u32 pvd_mask_s = 0x7fe01ff8; - -inline -fn __polyvec_decompress(reg u64 rp) -> stack u16[KYBER_VECN] -{ - inline int i k; - reg u256 f q shufbidx sllvdidx mask; - stack u16[KYBER_VECN] r; - - q = #VPBROADCAST_8u32(pvd_q_s); - shufbidx = pvd_shufbdidx_s[u256 0]; - sllvdidx = #VPBROADCAST_4u64(pvd_sllvdidx_s); - mask = #VPBROADCAST_8u32(pvd_mask_s); - - for k=0 to KYBER_K - { - for i=0 to KYBER_N/16 - { - f = (u256)[rp + 320 * k + 20 * i]; - f = #VPERMQ(f, 0x94); - f = #VPSHUFB_256(f, shufbidx); - f = #VPSLLV_8u32(f, sllvdidx); - f = #VPSRL_16u16(f, 1); - f = #VPAND_256(f, mask); - f = #VPMULHRS_16u16(f, q); - r[u256 16*k + i] = f; - } - } - - return r; -} - -u16 pvc_off_s = 0x0f; -u16 pvc_shift1_s = 0x1000; -u16 pvc_mask_s = 0x03ff; -u64 pvc_shift2_s = 0x0400000104000001; -u64 pvc_sllvdidx_s = 0x0C; -u8[32] pvc_shufbidx_s = {0, 1, 2, 3, 4, 8, 9, 10, 11, 12, -1, -1, -1, -1, -1, -1, - 9, 10, 11, 12, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 8}; - -inline -fn __polyvec_compress(reg u64 rp, stack u16[KYBER_VECN] a) -{ - inline int i; - reg u256 f0 f1 f2 v v8 off shift1 mask shift2 sllvdidx shufbidx; - reg u128 t0 t1; - reg ptr u16[16] x16p; - - a = __polyvec_csubq(a); - - x16p = jvx16; - v = x16p[u256 0]; - v8 = #VPSLL_16u16(v, 3); - off = #VPBROADCAST_16u16(pvc_off_s); - shift1 = #VPBROADCAST_16u16(pvc_shift1_s); - mask = #VPBROADCAST_16u16(pvc_mask_s); - shift2 = #VPBROADCAST_4u64(pvc_shift2_s); - sllvdidx = #VPBROADCAST_4u64(pvc_sllvdidx_s); - shufbidx = pvc_shufbidx_s[u256 0]; - - for i=0 to KYBER_VECN/16 - { - f0 = a[u256 i]; - f1 = #VPMULL_16u16(f0, v8); - f2 = #VPADD_16u16(f0, off); - f0 = #VPSLL_16u16(f0, 3); - f0 = #VPMULH_16u16(f0, v); - f2 = #VPSUB_16u16(f1, f2); - f1 = #VPANDN_256(f1, f2); - f1 = #VPSRL_16u16(f1, 15); - f0 = #VPSUB_16u16(f0, f1); - f0 = #VPMULHRS_16u16(f0, shift1); - f0 = #VPAND_256(f0, mask); - f0 = #VPMADDWD_256(f0, shift2); - f0 = #VPSLLV_8u32(f0, sllvdidx); - f0 = #VPSRL_4u64(f0, 12); - f0 = #VPSHUFB_256(f0, shufbidx); - t0 = (128u)f0; - t1 = #VEXTRACTI128(f0, 1); - t0 = #VPBLEND_8u16(t0, t1, 0xE0); - (u128)[rp + 20*i] = t0; - (u32)[rp + 20*i + 16] = #VPEXTR_32(t1, 0); - } -} - -inline -fn __polyvec_compress_1(reg ptr u8[KYBER_POLYVECCOMPRESSEDBYTES] rp, stack u16[KYBER_VECN] a) -> reg ptr u8[KYBER_POLYVECCOMPRESSEDBYTES] -{ - inline int i; - reg u256 f0 f1 f2 v v8 off shift1 mask shift2 sllvdidx shufbidx; - reg u128 t0 t1; - reg ptr u16[16] x16p; - - a = __polyvec_csubq(a); - - x16p = jvx16; - v = x16p[u256 0]; - v8 = #VPSLL_16u16(v, 3); - off = #VPBROADCAST_16u16(pvc_off_s); - shift1 = #VPBROADCAST_16u16(pvc_shift1_s); - mask = #VPBROADCAST_16u16(pvc_mask_s); - shift2 = #VPBROADCAST_4u64(pvc_shift2_s); - sllvdidx = #VPBROADCAST_4u64(pvc_sllvdidx_s); - shufbidx = pvc_shufbidx_s[u256 0]; - - for i=0 to KYBER_VECN/16 - { - f0 = a[u256 i]; - f1 = #VPMULL_16u16(f0, v8); - f2 = #VPADD_16u16(f0, off); - f0 = #VPSLL_16u16(f0, 3); - f0 = #VPMULH_16u16(f0, v); - f2 = #VPSUB_16u16(f1, f2); - f1 = #VPANDN_256(f1, f2); - f1 = #VPSRL_16u16(f1, 15); - f0 = #VPSUB_16u16(f0, f1); - f0 = #VPMULHRS_16u16(f0, shift1); - f0 = #VPAND_256(f0, mask); - f0 = #VPMADDWD_256(f0, shift2); - f0 = #VPSLLV_8u32(f0, sllvdidx); - f0 = #VPSRL_4u64(f0, 12); - f0 = #VPSHUFB_256(f0, shufbidx); - t0 = (128u)f0; - t1 = #VEXTRACTI128(f0, 1); - t0 = #VPBLEND_8u16(t0, t1, 0xE0); - rp.[u128 20*i] = t0; - rp.[u32 20*i + 16] = #VPEXTR_32(t1, 0); - } - - return rp; -} - -inline -fn __polyvec_frombytes(reg u64 ap) -> stack u16[KYBER_VECN] -{ - stack u16[KYBER_VECN] r; - reg u64 pp; - - pp = ap; - r[0:KYBER_N] = _poly_frombytes(r[0:KYBER_N], pp); - pp += KYBER_POLYBYTES; - r[KYBER_N:KYBER_N] = _poly_frombytes(r[KYBER_N:KYBER_N], pp); - pp += KYBER_POLYBYTES; - r[2*KYBER_N:KYBER_N] = _poly_frombytes(r[2*KYBER_N:KYBER_N], pp); - - return r; -} - - -inline -fn __polyvec_invntt(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_invntt(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_invntt(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_invntt(r[2*KYBER_N:KYBER_N]); - - return r; -} - - -inline -fn __polyvec_ntt(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_ntt(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_ntt(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_ntt(r[2*KYBER_N:KYBER_N]); - - return r; -} - - -inline -fn __polyvec_reduce(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = __poly_reduce(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = __poly_reduce(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = __poly_reduce(r[2*KYBER_N:KYBER_N]); - - return r; -} - - -inline -fn __polyvec_pointwise_acc(stack u16[KYBER_N] r, stack u16[KYBER_VECN] a b) -> stack u16[KYBER_N] -{ - stack u16[KYBER_N] t; - - r = _poly_basemul(r, a[0:KYBER_N], b[0:KYBER_N]); - t = _poly_basemul(t, a[KYBER_N:KYBER_N], b[KYBER_N:KYBER_N]); - r = _poly_add2(r, t); - t = _poly_basemul(t, a[2*KYBER_N:KYBER_N], b[2*KYBER_N:KYBER_N]); - r = _poly_add2(r, t); - - // r = __poly_reduce(r); - - return r; -} - - -inline -fn __polyvec_tobytes(reg u64 rp, stack u16[KYBER_VECN] a) -{ - reg u64 pp; - - pp = rp; - a[0:KYBER_N] = _poly_tobytes(pp, a[0:KYBER_N]); - pp += KYBER_POLYBYTES; - a[KYBER_N:KYBER_N] = _poly_tobytes(pp, a[KYBER_N:KYBER_N]); - pp += KYBER_POLYBYTES; - a[2*KYBER_N:KYBER_N] = _poly_tobytes(pp, a[2*KYBER_N:KYBER_N]); -} diff --git a/code/jasmin/avx2v/reduce.c b/code/jasmin/avx2v/reduce.c deleted file mode 100644 index 39264b09..00000000 --- a/code/jasmin/avx2v/reduce.c +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include "params.h" -#include "reduce.h" - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, -* where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) -{ - int32_t t; - int16_t u; - -// printf("a: %d\n", a); - u = a * QINV; - t = (int32_t)u * KYBER_Q; - t = a - t; - t >>= 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* 16-bit integer congruent to a mod q in {0,...,q} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {0,...,q} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int32_t t; - const int32_t v = (1U << 26)/KYBER_Q + 1; - - t = v*a; - t >>= 26; - t *= KYBER_Q; - return a - t; -} - -/************************************************* -* Name: csubq -* -* Description: Conditionallly subtract q -* -* Arguments: - int16_t x: input integer -* -* Returns: a - q if a >= q, else a -**************************************************/ -int16_t csubq(int16_t a) { - a -= KYBER_Q; - a += (a >> 15) & KYBER_Q; - return a; -} diff --git a/code/jasmin/avx2v/reduce.h b/code/jasmin/avx2v/reduce.h deleted file mode 100644 index 59ee6ef4..00000000 --- a/code/jasmin/avx2v/reduce.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include - -#define MONT 2285 // 2^16 % Q -#define QINV 62209 // q^(-1) mod 2^16 - -int16_t montgomery_reduce(int32_t a); - -int16_t barrett_reduce(int16_t a); - -int16_t csubq(int16_t x); - -#endif diff --git a/code/jasmin/avx2v/reduce.jinc b/code/jasmin/avx2v/reduce.jinc deleted file mode 100644 index f4070e6b..00000000 --- a/code/jasmin/avx2v/reduce.jinc +++ /dev/null @@ -1,95 +0,0 @@ -require "params.jinc" - -param int QINV = 62209; /* q^(-1) mod 2^16 */ -param int MONT = 2285; /* 2^16 % Q */ -param int BARR = 20159; /* (1U << 26)/KYBER_Q + 1 */ - -inline -fn __csubq(reg u256 r qx16) -> reg u256 -{ - reg u256 t; - r = #VPSUB_16u16(r, qx16); - t = #VPSRA_16u16(r, 15); - t = #VPAND_256(t, qx16); - r = #VPADD_16u16(t, r); - return r; -} - -inline -fn __red16x(reg u256 r qx16 vx16) -> reg u256 -{ - reg u256 x; - x = #VPMULH_16u16(r, vx16); - x = #VPSRA_16u16(x, 10); - x = #VPMULL_16u16(x, qx16); - r = #VPSUB_16u16(r, x); - return r; -} - -inline -fn __fqmulprecomp16x(reg u256 b al ah qx16) -> reg u256 -{ - reg u256 x; - x = #VPMULL_16u16(al, b); - b = #VPMULH_16u16(ah, b); - x = #VPMULH_16u16(x, qx16); - b = #VPSUB_16u16(b, x); - return b; -} - -inline -fn __fqmulx16(reg u256 a b qx16 qinvx16) -> reg u256 -{ - reg u256 rd rhi rlo; - rhi = #VPMULH_16u16(a, b); - rlo = #VPMULL_16u16(a, b); - - rlo = #VPMULL_16u16(rlo, qinvx16); - rlo = #VPMULH_16u16(rlo, qx16); - rd = #VPSUB_16u16(rhi, rlo); - - return rd; -} - -inline -fn __fqmul(reg u16 a, reg u16 b) -> reg u16 -{ - reg u32 ad; - reg u32 bd; - reg u32 c; - reg u32 t; - reg u16 r; - reg u32 u; - - ad = (32s)a; - bd = (32s)b; - - c = ad * bd; - - u = c * QINV; - u <<= 16; - //u = #SAR_32(u, 16); - u >>s= 16; - t = u * KYBER_Q; - t = c - t; - //t = #SAR_32(t, 16); - t >>s= 16; - r = t; - return r; -} - -inline -fn __barrett_reduce(reg u16 a) -> reg u16 -{ - reg u32 t; - reg u16 r; - t = (32s)a; - t = t * BARR; - //t = #SAR_32(t, 26); - t >>s= 26; - t *= KYBER_Q; - r = t; - r = a; - r -= t; - return r; -} diff --git a/code/jasmin/avx2v/shuffle.S b/code/jasmin/avx2v/shuffle.S deleted file mode 100644 index 46b676a1..00000000 --- a/code/jasmin/avx2v/shuffle.S +++ /dev/null @@ -1,261 +0,0 @@ -#include "consts.h" -.include "fq.inc" -.include "shuffle.inc" - -nttpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -#store -vmovdqa %ymm7,(%rdi) -vmovdqa %ymm9,32(%rdi) -vmovdqa %ymm6,64(%rdi) -vmovdqa %ymm3,96(%rdi) -vmovdqa %ymm10,128(%rdi) -vmovdqa %ymm4,160(%rdi) -vmovdqa %ymm5,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.text -nttunpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttunpack_avx) -cdecl(nttunpack_avx): -call nttunpack128_avx -add $256,%rdi -call nttunpack128_avx -ret - -.global cdecl(nttpack_avx) -cdecl(nttpack_avx): -call nttpack128_avx -add $256,%rdi -call nttpack128_avx -ret - - -ntttobytes128_avx: -#load -vmovdqa (%rsi),%ymm5 -vmovdqa 32(%rsi),%ymm6 -vmovdqa 64(%rsi),%ymm7 -vmovdqa 96(%rsi),%ymm8 -vmovdqa 128(%rsi),%ymm9 -vmovdqa 160(%rsi),%ymm10 -vmovdqa 192(%rsi),%ymm11 -vmovdqa 224(%rsi),%ymm12 - -#csubq -csubq 5,13 -csubq 6,14 -csubq 7,15 -csubq 8,1 -csubq 9,13 -csubq 10,14 -csubq 11,15 -csubq 12,1 - -#bitpack -vpsllw $12,%ymm6,%ymm4 -vpor %ymm4,%ymm5,%ymm4 - -vpsrlw $4,%ymm6,%ymm5 -vpsllw $8,%ymm7,%ymm6 -vpor %ymm5,%ymm6,%ymm5 - -vpsrlw $8,%ymm7,%ymm6 -vpsllw $4,%ymm8,%ymm7 -vpor %ymm6,%ymm7,%ymm6 - -vpsllw $12,%ymm10,%ymm7 -vpor %ymm7,%ymm9,%ymm7 - -vpsrlw $4,%ymm10,%ymm8 -vpsllw $8,%ymm11,%ymm9 -vpor %ymm8,%ymm9,%ymm8 - -vpsrlw $8,%ymm11,%ymm9 -vpsllw $4,%ymm12,%ymm10 -vpor %ymm9,%ymm10,%ymm9 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 - -shuffle2 3,4,8,4 -shuffle2 6,5,3,5 -shuffle2 7,9,6,9 - -shuffle4 8,3,7,3 -shuffle4 6,4,8,4 -shuffle4 5,9,6,9 - -shuffle8 7,8,5,8 -shuffle8 6,3,7,3 -shuffle8 4,9,6,9 - -#store -vmovdqu %ymm5,(%rdi) -vmovdqu %ymm7,32(%rdi) -vmovdqu %ymm6,64(%rdi) -vmovdqu %ymm8,96(%rdi) -vmovdqu %ymm3,128(%rdi) -vmovdqu %ymm9,160(%rdi) - -ret - -.global cdecl(ntttobytes_avx) -cdecl(ntttobytes_avx): -#consts -vmovdqa _16XQ*2(%rdx),%ymm0 -call ntttobytes128_avx -add $256,%rsi -add $192,%rdi -call ntttobytes128_avx -ret - -nttfrombytes128_avx: -#load -vmovdqu (%rsi),%ymm4 -vmovdqu 32(%rsi),%ymm5 -vmovdqu 64(%rsi),%ymm6 -vmovdqu 96(%rsi),%ymm7 -vmovdqu 128(%rsi),%ymm8 -vmovdqu 160(%rsi),%ymm9 - -shuffle8 4,7,3,7 -shuffle8 5,8,4,8 -shuffle8 6,9,5,9 - -shuffle4 3,8,6,8 -shuffle4 7,5,3,5 -shuffle4 4,9,7,9 - -shuffle2 6,5,4,5 -shuffle2 8,7,6,7 -shuffle2 3,9,8,9 - -shuffle1 4,7,10,7 -shuffle1 5,8,4,8 -shuffle1 6,9,5,9 - -#bitunpack -vpsrlw $12,%ymm10,%ymm11 -vpsllw $4,%ymm7,%ymm12 -vpor %ymm11,%ymm12,%ymm11 -vpand %ymm0,%ymm10,%ymm10 -vpand %ymm0,%ymm11,%ymm11 - -vpsrlw $8,%ymm7,%ymm12 -vpsllw $8,%ymm4,%ymm13 -vpor %ymm12,%ymm13,%ymm12 -vpand %ymm0,%ymm12,%ymm12 - -vpsrlw $4,%ymm4,%ymm13 -vpand %ymm0,%ymm13,%ymm13 - -vpsrlw $12,%ymm8,%ymm14 -vpsllw $4,%ymm5,%ymm15 -vpor %ymm14,%ymm15,%ymm14 -vpand %ymm0,%ymm8,%ymm8 -vpand %ymm0,%ymm14,%ymm14 - -vpsrlw $8,%ymm5,%ymm15 -vpsllw $8,%ymm9,%ymm1 -vpor %ymm15,%ymm1,%ymm15 -vpand %ymm0,%ymm15,%ymm15 - -vpsrlw $4,%ymm9,%ymm1 -vpand %ymm0,%ymm1,%ymm1 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm11,32(%rdi) -vmovdqa %ymm12,64(%rdi) -vmovdqa %ymm13,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm14,160(%rdi) -vmovdqa %ymm15,192(%rdi) -vmovdqa %ymm1,224(%rdi) - -ret - -.global cdecl(nttfrombytes_avx) -cdecl(nttfrombytes_avx): -#consts -vmovdqa _16XMASK*2(%rdx),%ymm0 -call nttfrombytes128_avx -add $256,%rdi -add $192,%rsi -call nttfrombytes128_avx -ret diff --git a/code/jasmin/avx2v/shuffle.inc b/code/jasmin/avx2v/shuffle.inc deleted file mode 100644 index df352030..00000000 --- a/code/jasmin/avx2v/shuffle.inc +++ /dev/null @@ -1,23 +0,0 @@ -.macro shuffle8 r0,r1,r2,r3 -vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 -vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle4 r0,r1,r2,r3 -vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 -vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle2 r0,r1,r2,r3 -vpsllq $32,%ymm\r1,%ymm12 -vpsrlq $32,%ymm\r0,%ymm13 -vpblendd $0xAA,%ymm12,%ymm\r0,%ymm\r2 -vpblendd $0xAA,%ymm\r1,%ymm13,%ymm\r3 -.endm - -.macro shuffle1 r0,r1,r2,r3 -vpslld $16,%ymm\r1,%ymm12 -vpsrld $16,%ymm\r0,%ymm13 -vpblendw $0xAA,%ymm12,%ymm\r0,%ymm\r2 -vpblendw $0xAA,%ymm\r1,%ymm13,%ymm\r3 -.endm diff --git a/code/jasmin/avx2v/shuffle.jinc b/code/jasmin/avx2v/shuffle.jinc deleted file mode 100644 index a187591f..00000000 --- a/code/jasmin/avx2v/shuffle.jinc +++ /dev/null @@ -1,192 +0,0 @@ -inline -fn __shuffle8(reg u256 a b) -> reg u256, reg u256 -{ - reg u256 r0 r1; - r0 = #VPERM2I128(a,b,0x20); - r1 = #VPERM2I128(a,b,0x31); - return r0, r1; -} - -inline -fn __shuffle4(reg u256 a b) -> reg u256, reg u256 -{ - reg u256 r0 r1; - r0 = #VPUNPCKL_4u64(a,b); - r1 = #VPUNPCKH_4u64(a,b); - return r0, r1; -} - -inline -fn __shuffle2(reg u256 a b) -> reg u256, reg u256 -{ - reg u256 t0 t1; - t0 = #VMOVSLDUP_8u32(b); - t0 = #VPBLEND_8u32(a, t0, 0xAA); - a = #VPSRL_4u64(a,32); - t1 = #VPBLEND_8u32(a, b, 0xAA); - return t0, t1; -} - - -inline -fn __shuffle1(reg u256 a b) -> reg u256, reg u256 -{ - reg u256 r0 r1 t0 t1; - t0 = #VPSLL_8u32(b,16); - r0 = #VPBLEND_16u16(a,t0,0xAA); - t1 = #VPSRL_8u32(a,16); - r1 = #VPBLEND_16u16(t1,b,0xAA); - return r0, r1; -} - - -// Transform from AVX order to bitreversed order -inline -fn __nttpack128(reg u256 r0 r1 r2 r3 r4 r5 r6 r7) - -> reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256 -{ - r0, r1 = __shuffle1(r0, r1); - r2, r3 = __shuffle1(r2, r3); - r4, r5 = __shuffle1(r4, r5); - r6, r7 = __shuffle1(r6, r7); - - r0, r2 = __shuffle2(r0, r2); - r4, r6 = __shuffle2(r4, r6); - r1, r3 = __shuffle2(r1, r3); - r5, r7 = __shuffle2(r5, r7); - - r0, r4 = __shuffle4(r0, r4); - r1, r5 = __shuffle4(r1, r5); - r2, r6 = __shuffle4(r2, r6); - r3, r7 = __shuffle4(r3, r7); - - r0, r1 = __shuffle8(r0, r1); - r2, r3 = __shuffle8(r2, r3); - r4, r5 = __shuffle8(r4, r5); - r6, r7 = __shuffle8(r6, r7); - - return r0, r2, r4, r6, r1, r3, r5, r7; -} - - -// Transform from bitreversed order to AVX order -inline -fn __nttunpack128(reg u256 r0 r1 r2 r3 r4 r5 r6 r7) - -> reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256, reg u256 -{ - r0, r4 = __shuffle8(r0, r4); - r1, r5 = __shuffle8(r1, r5); - r2, r6 = __shuffle8(r2, r6); - r3, r7 = __shuffle8(r3, r7); - - r0, r2 = __shuffle4(r0, r2); - r4, r6 = __shuffle4(r4, r6); - r1, r3 = __shuffle4(r1, r3); - r5, r7 = __shuffle4(r5, r7); - - r0, r1 = __shuffle2(r0, r1); - r2, r3 = __shuffle2(r2, r3); - r4, r5 = __shuffle2(r4, r5); - r6, r7 = __shuffle2(r6, r7); - - r0, r4 = __shuffle1(r0, r4); - r1, r5 = __shuffle1(r1, r5); - r2, r6 = __shuffle1(r2, r6); - r3, r7 = __shuffle1(r3, r7); - - return r0, r4, r1, r5, r2, r6, r3, r7; -} - -fn _nttpack(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u256 r0 r1 r2 r3 r4 r5 r6 r7; - - r0 = rp.[u256 32*0]; - r1 = rp.[u256 32*1]; - r2 = rp.[u256 32*2]; - r3 = rp.[u256 32*3]; - r4 = rp.[u256 32*4]; - r5 = rp.[u256 32*5]; - r6 = rp.[u256 32*6]; - r7 = rp.[u256 32*7]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __nttpack128(r0, r1, r2, r3, r4, r5, r6, r7); - - rp.[u256 32*0] = r0; - rp.[u256 32*1] = r1; - rp.[u256 32*2] = r2; - rp.[u256 32*3] = r3; - rp.[u256 32*4] = r4; - rp.[u256 32*5] = r5; - rp.[u256 32*6] = r6; - rp.[u256 32*7] = r7; - - r0 = rp.[u256 32*8]; - r1 = rp.[u256 32*9]; - r2 = rp.[u256 32*10]; - r3 = rp.[u256 32*11]; - r4 = rp.[u256 32*12]; - r5 = rp.[u256 32*13]; - r6 = rp.[u256 32*14]; - r7 = rp.[u256 32*15]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __nttpack128(r0, r1, r2, r3, r4, r5, r6, r7); - - rp.[u256 32*8] = r0; - rp.[u256 32*9] = r1; - rp.[u256 32*10] = r2; - rp.[u256 32*11] = r3; - rp.[u256 32*12] = r4; - rp.[u256 32*13] = r5; - rp.[u256 32*14] = r6; - rp.[u256 32*15] = r7; - - return rp; -} - -fn _nttunpack(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u256 r0 r1 r2 r3 r4 r5 r6 r7; - - r0 = rp.[u256 32*0]; - r1 = rp.[u256 32*1]; - r2 = rp.[u256 32*2]; - r3 = rp.[u256 32*3]; - r4 = rp.[u256 32*4]; - r5 = rp.[u256 32*5]; - r6 = rp.[u256 32*6]; - r7 = rp.[u256 32*7]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __nttunpack128(r0, r1, r2, r3, r4, r5, r6, r7); - - rp.[u256 32*0] = r0; - rp.[u256 32*1] = r1; - rp.[u256 32*2] = r2; - rp.[u256 32*3] = r3; - rp.[u256 32*4] = r4; - rp.[u256 32*5] = r5; - rp.[u256 32*6] = r6; - rp.[u256 32*7] = r7; - - r0 = rp.[u256 32*8]; - r1 = rp.[u256 32*9]; - r2 = rp.[u256 32*10]; - r3 = rp.[u256 32*11]; - r4 = rp.[u256 32*12]; - r5 = rp.[u256 32*13]; - r6 = rp.[u256 32*14]; - r7 = rp.[u256 32*15]; - - r0, r1, r2, r3, r4, r5, r6, r7 = __nttunpack128(r0, r1, r2, r3, r4, r5, r6, r7); - - rp.[u256 32*8] = r0; - rp.[u256 32*9] = r1; - rp.[u256 32*10] = r2; - rp.[u256 32*11] = r3; - rp.[u256 32*12] = r4; - rp.[u256 32*13] = r5; - rp.[u256 32*14] = r6; - rp.[u256 32*15] = r7; - - return rp; -} diff --git a/code/jasmin/avx2v/speed.h b/code/jasmin/avx2v/speed.h deleted file mode 100644 index b4b917c5..00000000 --- a/code/jasmin/avx2v/speed.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef SPEED_H -#define SPEED_H - -#include -#include "params.h" - -typedef struct{ - int16_t __attribute__((aligned(32))) coeffs[KYBER_N]; -} poly; - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -void gen_matrix_jazz(polyvec *a, unsigned char *seed); - -/*Poly functions*/ -void poly_compress_jazz(unsigned char *r, poly *a); -void poly_decompress_jazz(poly *r, const unsigned char *a); - -void poly_frommsg_jazz(poly *r, const unsigned char msg[KYBER_SYMBYTES]); -void poly_tomsg_jazz(unsigned char msg[KYBER_SYMBYTES], poly *r); - -void poly_getnoise_jazz(poly *r,const unsigned char *seed, unsigned char nonce); -void poly_getnoise_4x_jazz(poly *r0, poly *r1, poly *r2, poly *r3,const unsigned char *seed, unsigned char nonce); - -void poly_ntt_jazz(poly *r); -void poly_invntt_jazz(poly *r); - -/*Polyvec functions*/ -void polyvec_compress_jazz(unsigned char *r, polyvec *a); -void polyvec_decompress_jazz(polyvec *r, const unsigned char *a); - -void polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); - -/* Indcpa functions*/ -void indcpa_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - -/* KEM functions */ -void crypto_kem_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void crypto_kem_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); -void crypto_kem_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); -#endif diff --git a/code/jasmin/avx2v/symmetric-fips202.c b/code/jasmin/avx2v/symmetric-fips202.c deleted file mode 100644 index cf159db3..00000000 --- a/code/jasmin/avx2v/symmetric-fips202.c +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to KYBER_SYMBYTES input to be absorbed into s -* - unsigned char i additional byte of input -* - unsigned char j additional byte of input -**************************************************/ -void kyber_shake128_absorb(keccak_state *s, const unsigned char *input, unsigned char x, unsigned char y) -{ - unsigned char extseed[KYBER_SYMBYTES+2]; - int i; - - for(i=0;is, extseed, KYBER_SYMBYTES+2); -} - -/************************************************* -* Name: kyber_shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - keccak_state *s: pointer to in/output Keccak state -**************************************************/ -void kyber_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, keccak_state *s) -{ - shake128_squeezeblocks(output, nblocks, s->s); -} - -/************************************************* -* Name: shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: number of requested output bytes -* - const unsigned char * key: pointer to the key (of length KYBER_SYMBYTES) -* - const unsigned char nonce: single-byte nonce (public PRF input) -**************************************************/ -void shake256_prf(unsigned char *output, unsigned long long outlen, const unsigned char *key, const unsigned char nonce) -{ - unsigned char extkey[KYBER_SYMBYTES+1]; - size_t i; - - for(i=0;i -#include -#include -#include - -#include "../params.h" -#include "../ntt.h" -#include "../indcpa.h" - -#define NRUNS 100 - -static inline uint64_t cpucycles(void) { - uint64_t result; - - asm volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax" - : "=a" (result) : : "%rdx"); - - return result; -} - -static int cmp_uint64(const void *a, const void *b) { - if(*(uint64_t *)a < *(uint64_t *)b) return -1; - if(*(uint64_t *)a > *(uint64_t *)b) return 1; - return 0; -} - -static uint64_t median(uint64_t *l, size_t llen) { - qsort(l,llen,sizeof(uint64_t),cmp_uint64); - - if(llen%2) return l[llen/2]; - else return (l[llen/2-1]+l[llen/2])/2; -} - -static uint64_t average(uint64_t *t, size_t tlen) { - size_t i; - uint64_t acc=0; - - for(i=0;i -#include -#include -#include - -#include "../params.h" -#include "../speed.h" - -#define NRUNS 1000 - -static inline uint64_t cpucycles(void) { - uint64_t result; - - asm volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax" - : "=a" (result) : : "%rdx"); - - return result; -} - -static int cmp_uint64(const void *a, const void *b) { - if(*(uint64_t *)a < *(uint64_t *)b) return -1; - if(*(uint64_t *)a > *(uint64_t *)b) return 1; - return 0; -} - -static uint64_t median(uint64_t *l, size_t llen) { - qsort(l,llen,sizeof(uint64_t),cmp_uint64); - - if(llen%2) return l[llen/2]; - else return (l[llen/2-1]+l[llen/2])/2; -} - -static uint64_t average(uint64_t *t, size_t tlen) { - size_t i; - uint64_t acc=0; - - for(i=0;i -#include "../fips202.h" - -#define MAXINLEN 33 -#define MAXOUTLEN 168 - -int main(void) -{ - unsigned char in[MAXINLEN]; - unsigned char out0[MAXOUTLEN]; - unsigned char out1[MAXOUTLEN]; - uint64_t state0[25]; - uint64_t state1[25]; - int k; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, sizeof(in), urandom); - - shake256(out0, 128, in, 33); - shake256_128_33_jazz(out1, in); - - for(k=0;k<128;k++) - if(out0[k] != out1[k]) printf("error shake256 at %d: %d %d\n", k, out0[k], out1[k]); - - sha3_512(out0, in, 32); - sha3_512_32_jazz(out1, in); - - for(k=0;k<64;k++) - if(out0[k] != out1[k]) printf("error sha3512 at %d: %d %d\n", k, out0[k], out1[k]); - - shake128_absorb(state0, in, 34); - shake128_absorb34_jazz(state1, in); - - for(k=0;k<25;k++) - if(state0[k] != state1[k]) printf("error shake128_absorb at %d: %lu %lu\n", k, state0[k], state1[k]); - - shake128_squeezeblocks(out0, 1, state0); - shake128_squeezeblock_jazz(out1, state1); - - for(k=0;k<25;k++) - if(state0[k] != state1[k]) printf("error shake128_squeezeblock (state) at %d: %lu %lu\n", k, state0[k], state1[k]); - - for(k=0;k - -#include "../params.h" -#include "../ntt.h" -#include "../indcpa.h" - -int main(void) -{ - unsigned char sk0[KYBER_INDCPA_SECRETKEYBYTES]; - unsigned char sk1[KYBER_INDCPA_SECRETKEYBYTES]; - unsigned char pk0[KYBER_INDCPA_PUBLICKEYBYTES]; - unsigned char pk1[KYBER_INDCPA_PUBLICKEYBYTES]; - unsigned char ct0[KYBER_INDCPA_BYTES]; - unsigned char ct1[KYBER_INDCPA_BYTES]; - - unsigned char randomness0[KYBER_SYMBYTES]; - unsigned char randomness1[KYBER_SYMBYTES]; - unsigned char message[KYBER_INDCPA_MSGBYTES]; - - /* - unsigned char outmsg0[KYBER_INDCPA_MSGBYTES]; - unsigned char outmsg1[KYBER_INDCPA_MSGBYTES]; - */ - unsigned char outmsg0[KYBER_POLYVECBYTES]; - unsigned char outmsg1[KYBER_POLYVECBYTES]; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(randomness0, KYBER_SYMBYTES, 1, urandom); - fread(randomness1, KYBER_SYMBYTES, 1, urandom); - fread(message, KYBER_SYMBYTES, 1, urandom); - fclose(urandom); - - /* TEST KEYPAIR */ - indcpa_keypair_jazz(pk1, sk1, randomness0); - indcpa_keypair(pk0, sk0, randomness0); - - for(int i=0;i -#include - -#include "../params.h" -#include "../ntt.h" -#include "../kem.h" - -int main(void) -{ - unsigned char sk0[KYBER_SECRETKEYBYTES]; - unsigned char sk1[KYBER_SECRETKEYBYTES]; - unsigned char pk0[KYBER_PUBLICKEYBYTES]; - unsigned char pk1[KYBER_PUBLICKEYBYTES]; - unsigned char ct0[KYBER_CIPHERTEXTBYTES]; - unsigned char ct1[KYBER_CIPHERTEXTBYTES]; - unsigned char shk0[KYBER_SSBYTES]; - unsigned char shk1[KYBER_SSBYTES]; - - unsigned char randomness0[2*KYBER_SYMBYTES]; - unsigned char randomness1[2*KYBER_SYMBYTES]; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(randomness0, 2*KYBER_SYMBYTES, 1, urandom); - fread(randomness1, 2*KYBER_SYMBYTES, 1, urandom); - fclose(urandom); - - /* TEST KEYPAIR */ - jade_kem_kyber_kyber768_amd64_avx2v_keypair_derand(pk1, sk1, randomness0); - crypto_kem_keypair(pk0, sk0, randomness0); - - for(int i=0;i -#include "../poly.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - poly a, b, r0; - - poly_setrandom(&a); - poly_setrandom(&b); - - poly_add(&r0, &a, &b); - - poly_add2_jazz(&a, &b); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - poly a, b, r0, r1; - - poly_setrandom(&a); - poly_setrandom(&b); - - poly_basemul(&r0, &a, &b); - - poly_basemul_jazz(&r1, &a, &b); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - unsigned char out0[128]; - unsigned char out1[128]; - poly a; - - poly_setrandom(&a); - - poly_compress(out0, &a); - poly_compress_jazz(out1, &a); - - for(int i=0;i<128;i++) - { - if(out0[i] != out1[i]) - printf("error compress %d, %d, %d\n", i, out0[i], out1[i]); - } - - return 0; -} diff --git a/code/jasmin/avx2v/test/test_poly_csubq.c b/code/jasmin/avx2v/test/test_poly_csubq.c deleted file mode 100644 index 87f28e08..00000000 --- a/code/jasmin/avx2v/test/test_poly_csubq.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -int main(void) -{ - unsigned char in[KYBER_POLYCOMPRESSEDBYTES]; - poly r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYCOMPRESSEDBYTES, urandom); - fclose(urandom); - - poly_decompress(&r0, in); - poly_decompress_jazz(&r1, in); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -int main(void) -{ - unsigned char in[KYBER_POLYBYTES]; - poly r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYBYTES, urandom); - fclose(urandom); - - poly_frombytes(&r0, in); - poly_frombytes_jazz(&r1, in); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - } - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -int main(void) -{ - unsigned char in[32]; - poly r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, 32, urandom); - fclose(urandom); - - poly_frommsg(&r0, in); - poly_frommsg_jazz(&r1, in); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" -#include "../params.h" - - -int main(void) -{ - poly r0[4], r1[4]; - unsigned char seed[KYBER_SYMBYTES]; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(seed, 1, KYBER_SYMBYTES, urandom); - fclose(urandom); - - poly_getnoise_eta1(r0, seed, 0); - poly_getnoise_eta1(&r0[1], seed, 1); - poly_getnoise_eta1(&r0[2], seed, 2); - poly_getnoise_eta1(&r0[3], seed, 3); - poly_getnoise_eta1_4x_jazz(r1, seed, 0); - - for(int i=0;i<4;i++) - { - for(int j=0;j -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - } - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - } - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - for(int i=0;i -#include "../poly.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - poly a, b, r0, r1; - - poly_setrandom(&a); - poly_setrandom(&b); - - poly_sub(&r0, &a, &b); - - poly_sub_jazz(&r1, &a, &b); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - unsigned char out0[KYBER_POLYBYTES]; - unsigned char out1[KYBER_POLYBYTES]; - poly a; - - poly_setrandom(&a); - - poly_tobytes(out0, &a); - poly_tobytes_jazz(out1, &a); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - unsigned char out0[KYBER_INDCPA_MSGBYTES]; - unsigned char out1[KYBER_INDCPA_MSGBYTES]; - poly a; - - poly_setrandom(&a); - - poly_tomsg(out0, &a); - poly_tomsg_jazz(out1, &a); - - for(int i=0;i -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec a, b, r0; - - polyvec_setrandom(&a); - polyvec_setrandom(&b); - - polyvec_add(&r0, &a, &b); - polyvec_add2_jazz(&a, &b); - - for(int i=0;i -#include "../polyvec.h" -#include "../ntt.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - - polyvec_reduce(r); - fclose(urandom); -} - -int main(void) -{ - unsigned char out0[KYBER_POLYVECCOMPRESSEDBYTES]; - unsigned char out1[KYBER_POLYVECCOMPRESSEDBYTES]; - polyvec a; - - polyvec_setrandom(&a); - - polyvec_compress(out0, &a); - polyvec_compress_jazz(out1, &a); - - for(int i=0;i -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - polyvec_reduce(r); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../polyvec.h" - -int main(void) -{ - unsigned char in[KYBER_POLYVECCOMPRESSEDBYTES]; - polyvec r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYVECCOMPRESSEDBYTES, urandom); - fclose(urandom); - - polyvec_decompress(&r0, in); - polyvec_decompress_jazz(&r1, in); - - for(int i=0;i -#include "../polyvec.h" - -int main(void) -{ - unsigned char in[KYBER_POLYVECBYTES]; - polyvec r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYVECBYTES, urandom); - fclose(urandom); - - polyvec_frombytes(&r0, in); - polyvec_frombytes_jazz(&r1, in); - - for(int i=0;i -#include "../ntt.h" -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../ntt.h" -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= 2*KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../ntt.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec a, b; - poly r0, r1; - - polyvec_setrandom(&a); - polyvec_setrandom(&b); - - polyvec_pointwise_acc(&r0, &a, &b); - polyvec_pointwise_acc_jazz(&r1, &a, &b); - - for(int j=0;j -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../polyvec.h" -#include "../ntt.h" -#include "../reduce.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - - polyvec_reduce(r); - - fclose(urandom); -} - -int main(void) -{ - unsigned char out0[KYBER_POLYVECBYTES]; - unsigned char out1[KYBER_POLYVECBYTES]; - polyvec a; - - polyvec_setrandom(&a); - - polyvec_tobytes(out0, &a); - polyvec_tobytes_jazz(out1, &a); - - for(int i=0;i reg u64 -{ - reg u256 f g h; - reg u64 cnd t64; - reg u8 t1 t2; - reg bool zf; - inline int i off; - - cnd = 0; - t64 = 1; - h = #set0_256(); - - for i=0 to KYBER_INDCPA_CIPHERTEXTBYTES/32 - { - f = ctpc.[u256 32*i]; - g = (u256)[ctp + 32*i]; - f = #VPXOR_256(f, g); - h = #VPOR_256(h, f); - } - - _, _, _, _, zf = #VPTEST_256(h, h); - - cnd = t64 if !zf; - - off = KYBER_INDCPA_CIPHERTEXTBYTES/32 * 32; - - for i=off to KYBER_INDCPA_CIPHERTEXTBYTES - { - t1 = ctpc.[i]; - t2 = (u8)[ctp + i]; - t1 ^= t2; - t64 = (64u)t1; - cnd |= t64; - } - - cnd = -cnd; - cnd >>= 63; - - return cnd; -} - -inline -fn __cmov(reg ptr u8[KYBER_SYMBYTES] dst, reg u64 src cnd) -> reg ptr u8[KYBER_SYMBYTES] -{ - reg u256 f g m; - stack u64 scnd; - reg u8 t1 t2 bcond; - inline int i off; - - cnd = -cnd; - scnd = cnd; - - m = #VPBROADCAST_4u64(scnd); - - for i=0 to KYBER_SYMBYTES/32 - { - f = dst.[u256 32*i]; - g = (u256)[src + 32*i]; - f = #VPBLENDVB_256(f, g, m); - dst.[u256 32*i] = f; - } - - off = KYBER_SYMBYTES/32 * 32; - - bcond = (8u)cnd; - for i=off to KYBER_SYMBYTES - { - t1 = dst.[i]; - t2 = (u8)[src + i]; - t2 = t2 ^ t1; - t2 = t2 & cnd; - t1 ^= t2; - dst.[u8 i] = t1; - } - - return dst; -} diff --git a/code/jasmin/ref/Makefile b/code/jasmin/ref/Makefile deleted file mode 100644 index 9900a733..00000000 --- a/code/jasmin/ref/Makefile +++ /dev/null @@ -1,121 +0,0 @@ -# -*- Makefile -*- - --include ../../Makefile.conf - -CC ?= /usr/bin/gcc -CFLAGS := -Wall -Wextra -g -O3 -fomit-frame-pointer -JFLAGS := -lea ${JADDFLAGS} -OS := $(shell uname -s) - -.SECONDARY: jpoly.s jpolyvec.s jfips203.s jindcpa.s jkem.s - -default: test speed - -test: test/test_poly_compress \ - test/test_poly_decompress \ - test/test_poly_tobytes \ - test/test_poly_frombytes \ - test/test_poly_tomsg \ - test/test_poly_frommsg \ - test/test_poly_add2 \ - test/test_poly_sub \ - test/test_poly_ntt \ - test/test_poly_invntt \ - test/test_poly_basemul \ - test/test_poly_frommont \ - test/test_poly_reduce \ - test/test_poly_csubq \ - test/test_poly_getnoise \ - test/test_polyvec_compress\ - test/test_polyvec_decompress\ - test/test_polyvec_tobytes \ - test/test_polyvec_frombytes \ - test/test_polyvec_add2 \ - test/test_polyvec_ntt \ - test/test_polyvec_invntt \ - test/test_polyvec_pointwise_acc \ - test/test_polyvec_reduce\ - test/test_polyvec_csubq \ - test/test_fips202 \ - test/test_indcpa \ - test/test_kem - -speed: test/speed_indcpa - -HEADERS = params.h poly.h fips202.h kem.h - - -JHEADERS = params.jinc \ - reduce.jinc \ - fips202.jinc \ - kem.jinc \ - verify.jinc - -POLYHEADERS = poly.jinc \ - zetas.jinc \ - -POLYVECHEADERS = polyvec.jinc \ - gen_matrix.jinc \ - -SOURCES = poly.c polyvec.c cbd.c fips202.c ntt.c reduce.c symmetric-fips202.c indcpa.c kem.c \ - -test/test_indcpa: test/test_indcpa.c $(HEADERS) $(SOURCES) jindcpa.o - $(CC) $(CFLAGS) -o $@ $(SOURCES) jindcpa.o $< - -test/test_kem: test/test_kem.c $(HEADERS) $(SOURCES) $(INCS) jkem.o - $(CC) $(CFLAGS) -o $@ $(SOURCES) ~/Desktop/Repos/jasmin/compiler/syscall/jasmin_syscall.o jkem.o $< - -test/speed_indcpa: test/speed_indcpa.c $(HEADERS) $(SOURCES) jindcpa.o - $(CC) $(CFLAGS) -o $@ $(SOURCES) jindcpa.o $< - -test/test_fips202: test/test_fips202.c $(HEADERS) $(SOURCES) jfips202.s - $(CC) $(CFLAGS) -o $@ $(SOURCES) jfips202.s $< - -test/test_poly_%: test/test_poly_%.c $(HEADERS) $(SOURCES) jpoly.s - $(CC) $(CFLAGS) -o $@ $(SOURCES) jpoly.s $< - -test/test_polyvec_%: test/test_polyvec_%.c $(HEADERS) $(SOURCES) jpolyvec.s - $(CC) $(CFLAGS) -o $@ $(SOURCES) jpolyvec.s $< - -%.s: %.jazz - $(JASMIN) -o $@ $(JFLAGS) $^ - - -.PHONY: clean - -clean: - -rm -f *.s - -rm -f jindcpa.o - -rm -f jkem.o - -rm -f test/test_poly_compress - -rm -f test/test_poly_decompress - -rm -f test/test_poly_tobytes - -rm -f test/test_poly_frombytes - -rm -f test/test_poly_tomsg - -rm -f test/test_poly_frommsg - -rm -f test/test_poly_add2 - -rm -f test/test_poly_sub - -rm -f test/test_poly_ntt - -rm -f test/test_poly_invntt - -rm -f test/test_poly_basemul - -rm -f test/test_poly_frommont - -rm -f test/test_poly_reduce - -rm -f test/test_poly_csubq - -rm -f test/test_poly_getnoise - -rm -f test/test_polyvec_compress - -rm -f test/test_polyvec_decompress - -rm -f test/test_polyvec_tobytes - -rm -f test/test_polyvec_frombytes - -rm -f test/test_polyvec_add2 - -rm -f test/test_polyvec_ntt - -rm -f test/test_polyvec_invntt - -rm -f test/test_polyvec_pointwise_acc - -rm -f test/test_polyvec_reduce - -rm -f test/test_polyvec_csubq - -rm -f test/test_fips202 - -rm -f test/test_indcpa - -rm -f test/test_kem - -rm -f test/speed_indcpa -ifeq ($(OS),Darwin) - -rm -rf test/*.dSYM -endif diff --git a/code/jasmin/ref/cbd.c b/code/jasmin/ref/cbd.c deleted file mode 100644 index a28dc7e4..00000000 --- a/code/jasmin/ref/cbd.c +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: load32_littleendian -* -* Description: load bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const unsigned char *x) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: cbd -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter KYBER_ETA -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *buf: pointer to input byte array -**************************************************/ -void cbd(poly *r, const unsigned char *buf) -{ -#if KYBER_ETA == 2 - uint32_t d,t; - int16_t a,b; - int i,j; - - for(i=0;i>1) & 0x55555555; - - for(j=0;j<8;j++) - { - a = (d >> 4*j) & 0x3; - b = (d >> (4*j+2)) & 0x3; - r->coeffs[8*i+j] = a - b; - } - } -#else -#error "poly_getnoise in poly.c only supports eta=2" -#endif -} diff --git a/code/jasmin/ref/cbd.h b/code/jasmin/ref/cbd.h deleted file mode 100644 index e3dbe040..00000000 --- a/code/jasmin/ref/cbd.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include "poly.h" - -void cbd(poly *r, const unsigned char *buf); - -#endif diff --git a/code/jasmin/ref/extraction/Array1088.ec b/code/jasmin/ref/extraction/Array1088.ec deleted file mode 100644 index de2a1ea4..00000000 --- a/code/jasmin/ref/extraction/Array1088.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array1088 with op size <- 1088. diff --git a/code/jasmin/ref/extraction/Array128.ec b/code/jasmin/ref/extraction/Array128.ec deleted file mode 100644 index e5880272..00000000 --- a/code/jasmin/ref/extraction/Array128.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array128 with op size <- 128. diff --git a/code/jasmin/ref/extraction/Array168.ec b/code/jasmin/ref/extraction/Array168.ec deleted file mode 100644 index 6abfbe44..00000000 --- a/code/jasmin/ref/extraction/Array168.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array168 with op size <- 168. diff --git a/code/jasmin/ref/extraction/Array2304.ec b/code/jasmin/ref/extraction/Array2304.ec deleted file mode 100644 index f0038311..00000000 --- a/code/jasmin/ref/extraction/Array2304.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array2304 with op size <- 2304. diff --git a/code/jasmin/ref/extraction/Array24.ec b/code/jasmin/ref/extraction/Array24.ec deleted file mode 100644 index 8982b77c..00000000 --- a/code/jasmin/ref/extraction/Array24.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array24 with op size <- 24. diff --git a/code/jasmin/ref/extraction/Array25.ec b/code/jasmin/ref/extraction/Array25.ec deleted file mode 100644 index 30bcb172..00000000 --- a/code/jasmin/ref/extraction/Array25.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array25 with op size <- 25. diff --git a/code/jasmin/ref/extraction/Array256.ec b/code/jasmin/ref/extraction/Array256.ec deleted file mode 100644 index 6f03a141..00000000 --- a/code/jasmin/ref/extraction/Array256.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array256 with op size <- 256. diff --git a/code/jasmin/ref/extraction/Array32.ec b/code/jasmin/ref/extraction/Array32.ec deleted file mode 100644 index c72b94f2..00000000 --- a/code/jasmin/ref/extraction/Array32.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array32 with op size <- 32. diff --git a/code/jasmin/ref/extraction/Array33.ec b/code/jasmin/ref/extraction/Array33.ec deleted file mode 100644 index c60f0144..00000000 --- a/code/jasmin/ref/extraction/Array33.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array33 with op size <- 33. diff --git a/code/jasmin/ref/extraction/Array34.ec b/code/jasmin/ref/extraction/Array34.ec deleted file mode 100644 index d6bb77b1..00000000 --- a/code/jasmin/ref/extraction/Array34.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array34 with op size <- 34. diff --git a/code/jasmin/ref/extraction/Array4.ec b/code/jasmin/ref/extraction/Array4.ec deleted file mode 100644 index bc0e12ed..00000000 --- a/code/jasmin/ref/extraction/Array4.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array4 with op size <- 4. diff --git a/code/jasmin/ref/extraction/Array5.ec b/code/jasmin/ref/extraction/Array5.ec deleted file mode 100644 index 8dc7b36e..00000000 --- a/code/jasmin/ref/extraction/Array5.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array5 with op size <- 5. diff --git a/code/jasmin/ref/extraction/Array64.ec b/code/jasmin/ref/extraction/Array64.ec deleted file mode 100644 index 3ccc4576..00000000 --- a/code/jasmin/ref/extraction/Array64.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array64 with op size <- 64. diff --git a/code/jasmin/ref/extraction/Array768.ec b/code/jasmin/ref/extraction/Array768.ec deleted file mode 100644 index 241538a0..00000000 --- a/code/jasmin/ref/extraction/Array768.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array768 with op size <- 768. diff --git a/code/jasmin/ref/extraction/Array960.ec b/code/jasmin/ref/extraction/Array960.ec deleted file mode 100644 index bb0f324a..00000000 --- a/code/jasmin/ref/extraction/Array960.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JArray. - -clone export PolyArray as Array960 with op size <- 960. diff --git a/code/jasmin/ref/extraction/Makefile b/code/jasmin/ref/extraction/Makefile deleted file mode 100644 index 73b2f93a..00000000 --- a/code/jasmin/ref/extraction/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# -*- Makefile -*- - -# -------------------------------------------------------------------- --include ../../../Makefile.conf - -# -------------------------------------------------------------------- -.PHONY: all ec clean - -# -------------------------------------------------------------------- -all: ec - -ec: - $(JASMINC) ../jkem.jazz -oec jkem.ec \ - -ec jade_kem_kyber_kyber768_amd64_ref_keypair \ - -ec jade_kem_kyber_kyber768_amd64_ref_enc \ - -ec jade_kem_kyber_kyber768_amd64_ref_dec - -clean: - rm -f *.ec diff --git a/code/jasmin/ref/extraction/WArray1088.ec b/code/jasmin/ref/extraction/WArray1088.ec deleted file mode 100644 index 811cd399..00000000 --- a/code/jasmin/ref/extraction/WArray1088.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray1088 with op size <- 1088. diff --git a/code/jasmin/ref/extraction/WArray128.ec b/code/jasmin/ref/extraction/WArray128.ec deleted file mode 100644 index 3c9d6893..00000000 --- a/code/jasmin/ref/extraction/WArray128.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray128 with op size <- 128. diff --git a/code/jasmin/ref/extraction/WArray1536.ec b/code/jasmin/ref/extraction/WArray1536.ec deleted file mode 100644 index 83524e52..00000000 --- a/code/jasmin/ref/extraction/WArray1536.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray1536 with op size <- 1536. diff --git a/code/jasmin/ref/extraction/WArray168.ec b/code/jasmin/ref/extraction/WArray168.ec deleted file mode 100644 index 7292dff0..00000000 --- a/code/jasmin/ref/extraction/WArray168.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray168 with op size <- 168. diff --git a/code/jasmin/ref/extraction/WArray192.ec b/code/jasmin/ref/extraction/WArray192.ec deleted file mode 100644 index c8564c54..00000000 --- a/code/jasmin/ref/extraction/WArray192.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray192 with op size <- 192. diff --git a/code/jasmin/ref/extraction/WArray20.ec b/code/jasmin/ref/extraction/WArray20.ec deleted file mode 100644 index ae36a56f..00000000 --- a/code/jasmin/ref/extraction/WArray20.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray20 with op size <- 20. diff --git a/code/jasmin/ref/extraction/WArray200.ec b/code/jasmin/ref/extraction/WArray200.ec deleted file mode 100644 index 99b887c8..00000000 --- a/code/jasmin/ref/extraction/WArray200.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray200 with op size <- 200. diff --git a/code/jasmin/ref/extraction/WArray256.ec b/code/jasmin/ref/extraction/WArray256.ec deleted file mode 100644 index b07b1c22..00000000 --- a/code/jasmin/ref/extraction/WArray256.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray256 with op size <- 256. diff --git a/code/jasmin/ref/extraction/WArray32.ec b/code/jasmin/ref/extraction/WArray32.ec deleted file mode 100644 index b828f8d3..00000000 --- a/code/jasmin/ref/extraction/WArray32.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray32 with op size <- 32. diff --git a/code/jasmin/ref/extraction/WArray33.ec b/code/jasmin/ref/extraction/WArray33.ec deleted file mode 100644 index 1e8a9d93..00000000 --- a/code/jasmin/ref/extraction/WArray33.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray33 with op size <- 33. diff --git a/code/jasmin/ref/extraction/WArray34.ec b/code/jasmin/ref/extraction/WArray34.ec deleted file mode 100644 index 1f331a9b..00000000 --- a/code/jasmin/ref/extraction/WArray34.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray34 with op size <- 34. diff --git a/code/jasmin/ref/extraction/WArray40.ec b/code/jasmin/ref/extraction/WArray40.ec deleted file mode 100644 index 003b6e23..00000000 --- a/code/jasmin/ref/extraction/WArray40.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray40 with op size <- 40. diff --git a/code/jasmin/ref/extraction/WArray4608.ec b/code/jasmin/ref/extraction/WArray4608.ec deleted file mode 100644 index e32c47df..00000000 --- a/code/jasmin/ref/extraction/WArray4608.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray4608 with op size <- 4608. diff --git a/code/jasmin/ref/extraction/WArray512.ec b/code/jasmin/ref/extraction/WArray512.ec deleted file mode 100644 index a690df87..00000000 --- a/code/jasmin/ref/extraction/WArray512.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray512 with op size <- 512. diff --git a/code/jasmin/ref/extraction/WArray64.ec b/code/jasmin/ref/extraction/WArray64.ec deleted file mode 100644 index 6f4aeb6b..00000000 --- a/code/jasmin/ref/extraction/WArray64.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray64 with op size <- 64. diff --git a/code/jasmin/ref/extraction/WArray960.ec b/code/jasmin/ref/extraction/WArray960.ec deleted file mode 100644 index c2f56fe9..00000000 --- a/code/jasmin/ref/extraction/WArray960.ec +++ /dev/null @@ -1,3 +0,0 @@ -from Jasmin require import JWord_array. - -clone export WArray as WArray960 with op size <- 960. diff --git a/code/jasmin/ref/extraction/jkem.ec b/code/jasmin/ref/extraction/jkem.ec deleted file mode 100644 index e51765ad..00000000 --- a/code/jasmin/ref/extraction/jkem.ec +++ /dev/null @@ -1,2460 +0,0 @@ -require import AllCore IntDiv CoreMap List Distr. -from Jasmin require import JModel_x86. -import SLH64. - - -require import Array4 Array5 Array24 Array25 Array32 Array33 Array34 Array64 - Array128 Array168 Array256 Array768 Array960 Array1088 - Array2304. -require import WArray20 WArray32 WArray33 WArray34 WArray40 WArray64 - WArray128 WArray168 WArray192 WArray200 WArray256 WArray512 - WArray960 WArray1088 WArray1536 WArray4608. - -abbrev jzetas_inv = Array128.of_list witness [W16.of_int 1701; -W16.of_int 1807; W16.of_int 1460; W16.of_int 2371; W16.of_int 2338; -W16.of_int 2333; W16.of_int 308; W16.of_int 108; W16.of_int 2851; -W16.of_int 870; W16.of_int 854; W16.of_int 1510; W16.of_int 2535; -W16.of_int 1278; W16.of_int 1530; W16.of_int 1185; W16.of_int 1659; -W16.of_int 1187; W16.of_int 3109; W16.of_int 874; W16.of_int 1335; -W16.of_int 2111; W16.of_int 136; W16.of_int 1215; W16.of_int 2945; -W16.of_int 1465; W16.of_int 1285; W16.of_int 2007; W16.of_int 2719; -W16.of_int 2726; W16.of_int 2232; W16.of_int 2512; W16.of_int 75; -W16.of_int 156; W16.of_int 3000; W16.of_int 2911; W16.of_int 2980; -W16.of_int 872; W16.of_int 2685; W16.of_int 1590; W16.of_int 2210; -W16.of_int 602; W16.of_int 1846; W16.of_int 777; W16.of_int 147; -W16.of_int 2170; W16.of_int 2551; W16.of_int 246; W16.of_int 1676; -W16.of_int 1755; W16.of_int 460; W16.of_int 291; W16.of_int 235; -W16.of_int 3152; W16.of_int 2742; W16.of_int 2907; W16.of_int 3224; -W16.of_int 1779; W16.of_int 2458; W16.of_int 1251; W16.of_int 2486; -W16.of_int 2774; W16.of_int 2899; W16.of_int 1103; W16.of_int 1275; -W16.of_int 2652; W16.of_int 1065; W16.of_int 2881; W16.of_int 725; -W16.of_int 1508; W16.of_int 2368; W16.of_int 398; W16.of_int 951; -W16.of_int 247; W16.of_int 1421; W16.of_int 3222; W16.of_int 2499; -W16.of_int 271; W16.of_int 90; W16.of_int 853; W16.of_int 1860; -W16.of_int 3203; W16.of_int 1162; W16.of_int 1618; W16.of_int 666; -W16.of_int 320; W16.of_int 8; W16.of_int 2813; W16.of_int 1544; -W16.of_int 282; W16.of_int 1838; W16.of_int 1293; W16.of_int 2314; -W16.of_int 552; W16.of_int 2677; W16.of_int 2106; W16.of_int 1571; -W16.of_int 205; W16.of_int 2918; W16.of_int 1542; W16.of_int 2721; -W16.of_int 2597; W16.of_int 2312; W16.of_int 681; W16.of_int 130; -W16.of_int 1602; W16.of_int 1871; W16.of_int 829; W16.of_int 2946; -W16.of_int 3065; W16.of_int 1325; W16.of_int 2756; W16.of_int 1861; -W16.of_int 1474; W16.of_int 1202; W16.of_int 2367; W16.of_int 3147; -W16.of_int 1752; W16.of_int 2707; W16.of_int 171; W16.of_int 3127; -W16.of_int 3042; W16.of_int 1907; W16.of_int 1836; W16.of_int 1517; -W16.of_int 359; W16.of_int 758; W16.of_int 1441]. - - -abbrev jzetas = Array128.of_list witness [W16.of_int 2285; W16.of_int 2571; -W16.of_int 2970; W16.of_int 1812; W16.of_int 1493; W16.of_int 1422; -W16.of_int 287; W16.of_int 202; W16.of_int 3158; W16.of_int 622; -W16.of_int 1577; W16.of_int 182; W16.of_int 962; W16.of_int 2127; -W16.of_int 1855; W16.of_int 1468; W16.of_int 573; W16.of_int 2004; -W16.of_int 264; W16.of_int 383; W16.of_int 2500; W16.of_int 1458; -W16.of_int 1727; W16.of_int 3199; W16.of_int 2648; W16.of_int 1017; -W16.of_int 732; W16.of_int 608; W16.of_int 1787; W16.of_int 411; -W16.of_int 3124; W16.of_int 1758; W16.of_int 1223; W16.of_int 652; -W16.of_int 2777; W16.of_int 1015; W16.of_int 2036; W16.of_int 1491; -W16.of_int 3047; W16.of_int 1785; W16.of_int 516; W16.of_int 3321; -W16.of_int 3009; W16.of_int 2663; W16.of_int 1711; W16.of_int 2167; -W16.of_int 126; W16.of_int 1469; W16.of_int 2476; W16.of_int 3239; -W16.of_int 3058; W16.of_int 830; W16.of_int 107; W16.of_int 1908; -W16.of_int 3082; W16.of_int 2378; W16.of_int 2931; W16.of_int 961; -W16.of_int 1821; W16.of_int 2604; W16.of_int 448; W16.of_int 2264; -W16.of_int 677; W16.of_int 2054; W16.of_int 2226; W16.of_int 430; -W16.of_int 555; W16.of_int 843; W16.of_int 2078; W16.of_int 871; -W16.of_int 1550; W16.of_int 105; W16.of_int 422; W16.of_int 587; -W16.of_int 177; W16.of_int 3094; W16.of_int 3038; W16.of_int 2869; -W16.of_int 1574; W16.of_int 1653; W16.of_int 3083; W16.of_int 778; -W16.of_int 1159; W16.of_int 3182; W16.of_int 2552; W16.of_int 1483; -W16.of_int 2727; W16.of_int 1119; W16.of_int 1739; W16.of_int 644; -W16.of_int 2457; W16.of_int 349; W16.of_int 418; W16.of_int 329; -W16.of_int 3173; W16.of_int 3254; W16.of_int 817; W16.of_int 1097; -W16.of_int 603; W16.of_int 610; W16.of_int 1322; W16.of_int 2044; -W16.of_int 1864; W16.of_int 384; W16.of_int 2114; W16.of_int 3193; -W16.of_int 1218; W16.of_int 1994; W16.of_int 2455; W16.of_int 220; -W16.of_int 2142; W16.of_int 1670; W16.of_int 2144; W16.of_int 1799; -W16.of_int 2051; W16.of_int 794; W16.of_int 1819; W16.of_int 2475; -W16.of_int 2459; W16.of_int 478; W16.of_int 3221; W16.of_int 3021; -W16.of_int 996; W16.of_int 991; W16.of_int 958; W16.of_int 1869; -W16.of_int 1522; W16.of_int 1628]. - - -abbrev roundconstants = Array24.of_list witness [W64.of_int 1; -W64.of_int 32898; W64.of_int (-9223372036854742902); -W64.of_int (-9223372034707259392); W64.of_int 32907; W64.of_int 2147483649; -W64.of_int (-9223372034707259263); W64.of_int (-9223372036854743031); -W64.of_int 138; W64.of_int 136; W64.of_int 2147516425; W64.of_int 2147483658; -W64.of_int 2147516555; W64.of_int (-9223372036854775669); -W64.of_int (-9223372036854742903); W64.of_int (-9223372036854743037); -W64.of_int (-9223372036854743038); W64.of_int (-9223372036854775680); -W64.of_int 32778; W64.of_int (-9223372034707292150); -W64.of_int (-9223372034707259263); W64.of_int (-9223372036854742912); -W64.of_int 2147483649; W64.of_int (-9223372034707259384)]. - - -module type Syscall_t = { - proc randombytes_32(_:W8.t Array32.t) : W8.t Array32.t - proc randombytes_64(_:W8.t Array64.t) : W8.t Array64.t -}. - -module Syscall : Syscall_t = { - proc randombytes_32(a:W8.t Array32.t) : W8.t Array32.t = { - a <$ dmap WArray32.darray - (fun a => Array32.init (fun i => WArray32.get8 a i)); - return a; - } - - proc randombytes_64(a:W8.t Array64.t) : W8.t Array64.t = { - a <$ dmap WArray64.darray - (fun a => Array64.init (fun i => WArray64.get8 a i)); - return a; - } -}. - -module M(SC:Syscall_t) = { - proc __fqmul (a:W16.t, b:W16.t) : W16.t = { - - var r:W16.t; - var ad:W32.t; - var bd:W32.t; - var c:W32.t; - var u:W32.t; - var t:W32.t; - - ad <- (sigextu32 a); - bd <- (sigextu32 b); - c <- (ad * bd); - u <- (c * (W32.of_int (62209 `<<` 16))); - u <- (u `|>>` (W8.of_int 16)); - t <- (u * (W32.of_int (- 3329))); - t <- (t + c); - t <- (t `|>>` (W8.of_int 16)); - r <- (truncateu16 t); - return (r); - } - - proc __barrett_reduce (a:W16.t) : W16.t = { - - var r:W16.t; - var t:W32.t; - - t <- (sigextu32 a); - t <- (t * (W32.of_int 20159)); - t <- (t `|>>` (W8.of_int 26)); - t <- (t * (W32.of_int 3329)); - r <- (truncateu16 t); - r <- a; - r <- (r - (truncateu16 t)); - return (r); - } - - proc __index (x:int, y:int) : int = { - - var r:int; - - r <- ((x %% 5) + (5 * (y %% 5))); - return (r); - } - - proc __theta (a:W64.t Array25.t) : W64.t Array25.t = { - var aux_1: bool; - var aux_0: bool; - var aux: int; - var aux_2: W64.t; - - var x:int; - var c:W64.t Array5.t; - var y:int; - var d:W64.t Array5.t; - var _0:bool; - var _1:bool; - c <- witness; - d <- witness; - x <- 0; - while (x < 5) { - c.[x] <- (W64.of_int 0); - y <- 0; - while (y < 5) { - c.[x] <- (c.[x] `^` a.[(x + (5 * y))]); - y <- y + 1; - } - x <- x + 1; - } - x <- 0; - while (x < 5) { - d.[x] <- c.[((x + 1) %% 5)]; - (aux_1, aux_0, aux_2) <- ROL_64 d.[x] (W8.of_int 1); - _0 <- aux_1; - _1 <- aux_0; - d.[x] <- aux_2; - d.[x] <- (d.[x] `^` c.[((x + 4) %% 5)]); - x <- x + 1; - } - x <- 0; - while (x < 5) { - y <- 0; - while (y < 5) { - a.[(x + (5 * y))] <- (a.[(x + (5 * y))] `^` d.[x]); - y <- y + 1; - } - x <- x + 1; - } - return (a); - } - - proc __keccakRhoOffsets (i:int) : int = { - var aux: int; - - var r:int; - var x:int; - var y:int; - var t:int; - var z:int; - - r <- 0; - x <- 1; - y <- 0; - t <- 0; - while (t < 24) { - if ((i = (x + (5 * y)))) { - r <- ((((t + 1) * (t + 2)) %/ 2) %% 64); - } else { - - } - z <- (((2 * x) + (3 * y)) %% 5); - x <- y; - y <- z; - t <- t + 1; - } - return (r); - } - - proc __rho (a:W64.t Array25.t) : W64.t Array25.t = { - var aux_1: bool; - var aux_0: bool; - var aux: int; - var aux_2: W64.t; - - var x:int; - var y:int; - var i:int; - var z:int; - var _0:bool; - var _1:bool; - - x <- 0; - while (x < 5) { - y <- 0; - while (y < 5) { - i <@ __index (x, y); - z <@ __keccakRhoOffsets (i); - (aux_1, aux_0, aux_2) <- ROL_64 a.[i] (W8.of_int z); - _0 <- aux_1; - _1 <- aux_0; - a.[i] <- aux_2; - y <- y + 1; - } - x <- x + 1; - } - return (a); - } - - proc __pi (a:W64.t Array25.t) : W64.t Array25.t = { - var aux: int; - - var i:int; - var t:W64.t; - var b:W64.t Array25.t; - var y:int; - var x:int; - b <- witness; - i <- 0; - while (i < 25) { - t <- a.[i]; - b.[i] <- t; - i <- i + 1; - } - x <- 0; - while (x < 5) { - y <- 0; - while (y < 5) { - t <- b.[(x + (5 * y))]; - i <@ __index (y, ((2 * x) + (3 * y))); - a.[i] <- t; - y <- y + 1; - } - x <- x + 1; - } - return (a); - } - - proc __chi (a:W64.t Array25.t) : W64.t Array25.t = { - var aux: int; - - var x:int; - var y:int; - var i:int; - var c:W64.t Array5.t; - c <- witness; - y <- 0; - while (y < 5) { - x <- 0; - while (x < 5) { - i <@ __index ((x + 1), y); - c.[x] <- a.[i]; - c.[x] <- (invw c.[x]); - i <@ __index ((x + 2), y); - c.[x] <- (c.[x] `&` a.[i]); - i <@ __index (x, y); - c.[x] <- (c.[x] `^` a.[i]); - x <- x + 1; - } - x <- 0; - while (x < 5) { - a.[(x + (5 * y))] <- c.[x]; - x <- x + 1; - } - y <- y + 1; - } - return (a); - } - - proc __iota (a:W64.t Array25.t, c:W64.t) : W64.t Array25.t = { - - - - a.[0] <- (a.[0] `^` c); - return (a); - } - - proc __keccakf1600_ref (state:W64.t Array25.t) : W64.t Array25.t = { - - var constptr:W64.t Array24.t; - var rctr:W64.t; - constptr <- witness; - constptr <- roundconstants; - rctr <- (W64.of_int 0); - - while ((rctr \ult (W64.of_int 192))) { - state <@ __theta (state); - state <@ __rho (state); - state <@ __pi (state); - state <@ __chi (state); - constptr <- roundconstants; - state <@ __iota (state, - (get64_direct (WArray192.init64 (fun i => (constptr).[i])) - (W64.to_uint rctr))); - rctr <- (rctr + (W64.of_int 8)); - } - return (state); - } - - proc __st0 (state:W64.t Array25.t) : W64.t Array25.t = { - var aux: int; - - var i:int; - - i <- 0; - while (i < 25) { - state.[i] <- (W64.of_int 0); - i <- i + 1; - } - return (state); - } - - proc __add_full_block (state:W64.t Array25.t, in_0:W64.t, inlen:W64.t, - r8:W64.t) : W64.t Array25.t * W64.t * W64.t = { - - var r64:W64.t; - var i:W64.t; - var t:W64.t; - - r64 <- r8; - r64 <- (r64 `>>` (W8.of_int 3)); - i <- (W64.of_int 0); - - while ((i \ult r64)) { - t <- (loadW64 Glob.mem (W64.to_uint (in_0 + ((W64.of_int 8) * i)))); - state.[(W64.to_uint i)] <- (state.[(W64.to_uint i)] `^` t); - i <- (i + (W64.of_int 1)); - } - in_0 <- (in_0 + r8); - inlen <- (inlen - r8); - return (state, in_0, inlen); - } - - proc __add_final_block (state:W64.t Array25.t, in_0:W64.t, inlen:W64.t, - trail_byte:W8.t, r8:W64.t) : W64.t Array25.t = { - - var inlen8:W64.t; - var i:W64.t; - var t:W64.t; - var c:W8.t; - - inlen8 <- inlen; - inlen8 <- (inlen8 `>>` (W8.of_int 3)); - i <- (W64.of_int 0); - - while ((i \ult inlen8)) { - t <- (loadW64 Glob.mem (W64.to_uint (in_0 + ((W64.of_int 8) * i)))); - state.[(W64.to_uint i)] <- (state.[(W64.to_uint i)] `^` t); - i <- (i + (W64.of_int 1)); - } - i <- (i `<<` (W8.of_int 3)); - - while ((i \ult inlen)) { - c <- (loadW8 Glob.mem (W64.to_uint (in_0 + i))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i)) `^` c)))); - i <- (i + (W64.of_int 1)); - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i)) `^` trail_byte)))); - i <- r8; - i <- (i - (W64.of_int 1)); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint i)) `^` (W8.of_int 128))))); - return (state); - } - - proc _shake256_128_33 (out:W8.t Array128.t, in_0:W8.t Array33.t) : - W8.t Array128.t = { - var aux: int; - - var sout:W8.t Array128.t; - var state:W64.t Array25.t; - var i:int; - var c:W8.t; - sout <- witness; - state <- witness; - sout <- out; - state <@ __st0 (state); - i <- 0; - while (i < 33) { - c <- in_0.[i]; - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) i (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) i) `^` c)))); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 33 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 33) `^` (W8.of_int 31))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1)) `^` (W8.of_int 128))))); - state <@ __keccakf1600_ref (state); - out <- sout; - i <- 0; - while (i < 128) { - c <- (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) i); - out.[i] <- c; - i <- i + 1; - } - return (out); - } - - proc _sha3512_32 (out:W8.t Array64.t, in_0:W8.t Array32.t) : W8.t Array64.t = { - var aux: int; - - var state:W64.t Array25.t; - var i:int; - var c:W8.t; - state <- witness; - state <@ __st0 (state); - i <- 0; - while (i < 32) { - c <- in_0.[i]; - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) i (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) i) `^` c)))); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32) `^` (W8.of_int 6))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1)) `^` (W8.of_int 128))))); - state <@ __keccakf1600_ref (state); - i <- 0; - while (i < 64) { - c <- (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) i); - out.[i] <- c; - i <- i + 1; - } - return (out); - } - - proc _shake128_absorb34 (state:W64.t Array25.t, in_0:W8.t Array34.t) : - W64.t Array25.t = { - var aux: int; - - var i:int; - var c:W8.t; - - state <@ __st0 (state); - i <- 0; - while (i < 34) { - c <- in_0.[i]; - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) i (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) i) `^` c)))); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 34 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 34) `^` (W8.of_int 31))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (168 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (168 - 1)) `^` (W8.of_int 128))))); - return (state); - } - - proc _shake128_squeezeblock (state:W64.t Array25.t, out:W8.t Array168.t) : - W64.t Array25.t * W8.t Array168.t = { - var aux: int; - - var i:int; - var c:W8.t; - - state <@ __keccakf1600_ref (state); - i <- 0; - while (i < 168) { - c <- (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) i); - out.[i] <- c; - i <- i + 1; - } - return (state, out); - } - - proc _isha3_256 (out:W8.t Array32.t, in_0:W64.t, inlen:W64.t) : W8.t Array32.t = { - var aux: int; - - var s_out:W8.t Array32.t; - var state:W64.t Array25.t; - var r8:W64.t; - var ilen:W64.t; - var s_in:W64.t; - var s_ilen:W64.t; - var s_r8:W64.t; - var t8:W8.t; - var i:int; - var t64:W64.t; - s_out <- witness; - state <- witness; - s_out <- out; - state <@ __st0 (state); - r8 <- (W64.of_int 136); - ilen <- inlen; - - while ((r8 \ule ilen)) { - (state, in_0, ilen) <@ __add_full_block (state, in_0, ilen, r8); - s_in <- in_0; - s_ilen <- ilen; - s_r8 <- r8; - state <@ __keccakf1600_ref (state); - in_0 <- s_in; - ilen <- s_ilen; - r8 <- s_r8; - } - t8 <- (W8.of_int 6); - state <@ __add_final_block (state, in_0, ilen, t8, r8); - state <@ __keccakf1600_ref (state); - out <- s_out; - i <- 0; - while (i < 4) { - t64 <- state.[i]; - out <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (out).[i_0])) i (t64))); - i <- i + 1; - } - return (out); - } - - proc _isha3_256_32 (out:W8.t Array32.t, in_0:W8.t Array32.t) : W8.t Array32.t = { - var aux: int; - - var s_out:W8.t Array32.t; - var state:W64.t Array25.t; - var i:int; - var t64:W64.t; - s_out <- witness; - state <- witness; - s_out <- out; - state <@ __st0 (state); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- t64; - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 32) `^` (W8.of_int 6))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1) ((W8.of_int 128)))); - state <@ __keccakf1600_ref (state); - out <- s_out; - i <- 0; - while (i < 4) { - t64 <- state.[i]; - out <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (out).[i_0])) i (t64))); - i <- i + 1; - } - return (out); - } - - proc _sha3_512_64 (out:W8.t Array64.t, in_0:W8.t Array64.t) : W8.t Array64.t = { - var aux: int; - - var state:W64.t Array25.t; - var i:int; - var t64:W64.t; - var out_s:W8.t Array64.t; - out_s <- witness; - state <- witness; - state <@ __st0 (state); - i <- 0; - while (i < 8) { - t64 <- (get64 (WArray64.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- (state.[i] `^` t64); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64) `^` (W8.of_int 6))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (72 - 1)) `^` (W8.of_int 128))))); - out_s <- out; - state <@ __keccakf1600_ref (state); - out <- out_s; - i <- 0; - while (i < 8) { - t64 <- state.[i]; - out <- - Array64.init - (WArray64.get8 (WArray64.set64 (WArray64.init8 (fun i_0 => (out).[i_0])) i (t64))); - i <- i + 1; - } - return (out); - } - - proc _shake256_64 (out:W64.t, outlen:W64.t, in_0:W8.t Array64.t) : unit = { - var aux: int; - - var s_out:W64.t; - var s_outlen:W64.t; - var state:W64.t Array25.t; - var i:int; - var t64:W64.t; - var j:W64.t; - var c:W8.t; - state <- witness; - s_out <- out; - s_outlen <- outlen; - state <@ __st0 (state); - i <- 0; - while (i < 8) { - t64 <- (get64 (WArray64.init8 (fun i_0 => (in_0).[i_0])) i); - state.[i] <- (state.[i] `^` t64); - i <- i + 1; - } - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64 (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) 64) `^` (W8.of_int 31))))); - state <- - Array25.init - (WArray200.get64 (WArray200.set8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1) (( - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (136 - 1)) `^` (W8.of_int 128))))); - state <@ __keccakf1600_ref (state); - outlen <- s_outlen; - out <- s_out; - - while (((W64.of_int 136) \ult outlen)) { - aux <- (136 %/ 8); - i <- 0; - while (i < aux) { - t64 <- state.[i]; - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (out + (W64.of_int (8 * i)))) (t64); - i <- i + 1; - } - out <- (out + (W64.of_int 136)); - outlen <- (outlen - (W64.of_int 136)); - s_out <- out; - s_outlen <- outlen; - state <@ __keccakf1600_ref (state); - outlen <- s_outlen; - out <- s_out; - } - s_outlen <- outlen; - outlen <- (outlen `>>` (W8.of_int 3)); - j <- (W64.of_int 0); - - while ((j \ult outlen)) { - t64 <- state.[(W64.to_uint j)]; - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (out + ((W64.of_int 8) * j))) (t64); - j <- (j + (W64.of_int 1)); - } - j <- (j `<<` (W8.of_int 3)); - outlen <- s_outlen; - - while ((j \ult outlen)) { - c <- - (get8 (WArray200.init64 (fun i_0 => (state).[i_0])) (W64.to_uint j)); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (out + j)) (c); - j <- (j + (W64.of_int 1)); - } - return (); - } - - proc _poly_add2 (rp:W16.t Array256.t, bp:W16.t Array256.t) : W16.t Array256.t = { - - var i:W64.t; - var a:W16.t; - var b:W16.t; - var r:W16.t; - - i <- (W64.of_int 0); - - while ((i \ult (W64.of_int 256))) { - a <- rp.[(W64.to_uint i)]; - b <- bp.[(W64.to_uint i)]; - r <- (a + b); - rp.[(W64.to_uint i)] <- r; - i <- (i + (W64.of_int 1)); - } - return (rp); - } - - proc _poly_csubq (rp:W16.t Array256.t) : W16.t Array256.t = { - - var i:W64.t; - var t:W16.t; - var b:W16.t; - - i <- (W64.of_int 0); - - while ((i \ult (W64.of_int 256))) { - t <- rp.[(W64.to_uint i)]; - t <- (t - (W16.of_int 3329)); - b <- t; - b <- (b `|>>` (W8.of_int 15)); - b <- (b `&` (W16.of_int 3329)); - t <- (t + b); - rp.[(W64.to_uint i)] <- t; - i <- (i + (W64.of_int 1)); - } - return (rp); - } - - proc _poly_basemul (rp:W16.t Array256.t, ap:W16.t Array256.t, - bp:W16.t Array256.t) : W16.t Array256.t = { - - var srp:W16.t Array256.t; - var zetasctr:W64.t; - var i:W64.t; - var zetasp:W16.t Array128.t; - var zeta_0:W16.t; - var a0:W16.t; - var b0:W16.t; - var a1:W16.t; - var b1:W16.t; - var r0:W16.t; - var t:W16.t; - var r1:W16.t; - srp <- witness; - zetasp <- witness; - srp <- rp; - zetasctr <- (W64.of_int 64); - i <- (W64.of_int 0); - - while ((i \ult (W64.of_int 256))) { - zetasp <- jzetas; - zeta_0 <- zetasp.[(W64.to_uint zetasctr)]; - zetasctr <- (zetasctr + (W64.of_int 1)); - a0 <- ap.[(W64.to_uint i)]; - b0 <- bp.[(W64.to_uint i)]; - i <- (i + (W64.of_int 1)); - a1 <- ap.[(W64.to_uint i)]; - b1 <- bp.[(W64.to_uint i)]; - i <- (i - (W64.of_int 1)); - r0 <@ __fqmul (a1, b1); - r0 <@ __fqmul (r0, zeta_0); - t <@ __fqmul (a0, b0); - r0 <- (r0 + t); - r1 <@ __fqmul (a0, b1); - t <@ __fqmul (a1, b0); - r1 <- (r1 + t); - rp <- srp; - rp.[(W64.to_uint i)] <- r0; - i <- (i + (W64.of_int 1)); - rp.[(W64.to_uint i)] <- r1; - srp <- rp; - zeta_0 <- (- zeta_0); - i <- (i + (W64.of_int 1)); - a0 <- ap.[(W64.to_uint i)]; - b0 <- bp.[(W64.to_uint i)]; - i <- (i + (W64.of_int 1)); - a1 <- ap.[(W64.to_uint i)]; - b1 <- bp.[(W64.to_uint i)]; - i <- (i - (W64.of_int 1)); - r0 <@ __fqmul (a1, b1); - r0 <@ __fqmul (r0, zeta_0); - t <@ __fqmul (a0, b0); - r0 <- (r0 + t); - r1 <@ __fqmul (a0, b1); - t <@ __fqmul (a1, b0); - r1 <- (r1 + t); - rp <- srp; - rp.[(W64.to_uint i)] <- r0; - i <- (i + (W64.of_int 1)); - rp.[(W64.to_uint i)] <- r1; - srp <- rp; - i <- (i + (W64.of_int 1)); - } - return (rp); - } - - proc __poly_reduce (rp:W16.t Array256.t) : W16.t Array256.t = { - - var j:W64.t; - var t:W16.t; - - j <- (W64.of_int 0); - - while ((j \ult (W64.of_int 256))) { - t <- rp.[(W64.to_uint j)]; - t <@ __barrett_reduce (t); - rp.[(W64.to_uint j)] <- t; - j <- (j + (W64.of_int 1)); - } - return (rp); - } - - proc _poly_compress (rp:W64.t, a:W16.t Array256.t) : W16.t Array256.t = { - - var i:W64.t; - var j:W64.t; - var t:W16.t; - var d0:W32.t; - var d1:W32.t; - - a <@ _poly_csubq (a); - i <- (W64.of_int 0); - j <- (W64.of_int 0); - - while ((i \ult (W64.of_int 128))) { - t <- a.[(W64.to_uint j)]; - d0 <- (zeroextu32 t); - d0 <- (d0 `<<` (W8.of_int 4)); - d0 <- (d0 + (W32.of_int 1665)); - d0 <- (d0 * (W32.of_int 80635)); - d0 <- (d0 `>>` (W8.of_int 28)); - d0 <- (d0 `&` (W32.of_int 15)); - j <- (j + (W64.of_int 1)); - t <- a.[(W64.to_uint j)]; - d1 <- (zeroextu32 t); - d1 <- (d1 `<<` (W8.of_int 4)); - d1 <- (d1 + (W32.of_int 1665)); - d1 <- (d1 * (W32.of_int 80635)); - d1 <- (d1 `>>` (W8.of_int 28)); - d1 <- (d1 `&` (W32.of_int 15)); - d1 <- (d1 `<<` (W8.of_int 4)); - d0 <- (d0 `|` d1); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + i)) ((truncateu8 d0)); - i <- (i + (W64.of_int 1)); - j <- (j + (W64.of_int 1)); - } - return (a); - } - - proc _i_poly_compress (rp:W8.t Array128.t, a:W16.t Array256.t) : W8.t Array128.t * - W16.t Array256.t = { - - var i:W64.t; - var j:W64.t; - var t:W16.t; - var d0:W32.t; - var d1:W32.t; - - a <@ _poly_csubq (a); - i <- (W64.of_int 0); - j <- (W64.of_int 0); - - while ((i \ult (W64.of_int 128))) { - t <- a.[(W64.to_uint j)]; - d0 <- (zeroextu32 t); - d0 <- (d0 `<<` (W8.of_int 4)); - d0 <- (d0 + (W32.of_int 1665)); - d0 <- (d0 * (W32.of_int 80635)); - d0 <- (d0 `>>` (W8.of_int 28)); - d0 <- (d0 `&` (W32.of_int 15)); - j <- (j + (W64.of_int 1)); - t <- a.[(W64.to_uint j)]; - d1 <- (zeroextu32 t); - d1 <- (d1 `<<` (W8.of_int 4)); - d1 <- (d1 + (W32.of_int 1665)); - d1 <- (d1 * (W32.of_int 80635)); - d1 <- (d1 `>>` (W8.of_int 28)); - d1 <- (d1 `&` (W32.of_int 15)); - d1 <- (d1 `<<` (W8.of_int 4)); - d0 <- (d0 `|` d1); - rp.[(W64.to_uint i)] <- (truncateu8 d0); - i <- (i + (W64.of_int 1)); - j <- (j + (W64.of_int 1)); - } - return (rp, a); - } - - proc _poly_decompress (rp:W16.t Array256.t, ap:W64.t) : W16.t Array256.t = { - - var i:W64.t; - var j:W64.t; - var t:W8.t; - var d0:W16.t; - var d1:W16.t; - - i <- (W64.of_int 0); - j <- (W64.of_int 0); - - while ((i \ult (W64.of_int 128))) { - t <- (loadW8 Glob.mem (W64.to_uint (ap + i))); - d0 <- (zeroextu16 t); - d1 <- (zeroextu16 t); - d0 <- (d0 `&` (W16.of_int 15)); - d1 <- (d1 `>>` (W8.of_int 4)); - d0 <- (d0 * (W16.of_int 3329)); - d1 <- (d1 * (W16.of_int 3329)); - d0 <- (d0 + (W16.of_int 8)); - d1 <- (d1 + (W16.of_int 8)); - d0 <- (d0 `>>` (W8.of_int 4)); - d1 <- (d1 `>>` (W8.of_int 4)); - rp.[(W64.to_uint j)] <- d0; - j <- (j + (W64.of_int 1)); - rp.[(W64.to_uint j)] <- d1; - j <- (j + (W64.of_int 1)); - i <- (i + (W64.of_int 1)); - } - return (rp); - } - - proc _poly_frombytes (rp:W16.t Array256.t, ap:W64.t) : W16.t Array256.t = { - var aux: int; - - var i:int; - var c0:W8.t; - var c1:W8.t; - var c2:W8.t; - var d0:W16.t; - var t:W16.t; - var d1:W16.t; - - aux <- (256 %/ 2); - i <- 0; - while (i < aux) { - c0 <- (loadW8 Glob.mem (W64.to_uint (ap + (W64.of_int (3 * i))))); - c1 <- - (loadW8 Glob.mem (W64.to_uint (ap + (W64.of_int ((3 * i) + 1))))); - c2 <- - (loadW8 Glob.mem (W64.to_uint (ap + (W64.of_int ((3 * i) + 2))))); - d0 <- (zeroextu16 c0); - t <- (zeroextu16 c1); - t <- (t `&` (W16.of_int 15)); - t <- (t `<<` (W8.of_int 8)); - d0 <- (d0 `|` t); - d1 <- (zeroextu16 c2); - d1 <- (d1 `<<` (W8.of_int 4)); - t <- (zeroextu16 c1); - t <- (t `>>` (W8.of_int 4)); - d1 <- (d1 `|` t); - rp.[(2 * i)] <- d0; - rp.[((2 * i) + 1)] <- d1; - i <- i + 1; - } - return (rp); - } - - proc _poly_frommont (rp:W16.t Array256.t) : W16.t Array256.t = { - - var dmont:W16.t; - var i:W64.t; - var r:W16.t; - - dmont <- (W16.of_int 1353); - i <- (W64.of_int 0); - - while ((i \ult (W64.of_int 256))) { - r <- rp.[(W64.to_uint i)]; - r <@ __fqmul (r, dmont); - rp.[(W64.to_uint i)] <- r; - i <- (i + (W64.of_int 1)); - } - return (rp); - } - - proc _i_poly_frommsg (rp:W16.t Array256.t, ap:W8.t Array32.t) : W16.t Array256.t = { - var aux: int; - - var i:int; - var c:W8.t; - var t:W16.t; - - i <- 0; - while (i < 32) { - c <- ap.[i]; - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[(8 * i)] <- t; - c <- (c `>>` (W8.of_int 1)); - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[((8 * i) + 1)] <- t; - c <- (c `>>` (W8.of_int 1)); - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[((8 * i) + 2)] <- t; - c <- (c `>>` (W8.of_int 1)); - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[((8 * i) + 3)] <- t; - c <- (c `>>` (W8.of_int 1)); - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[((8 * i) + 4)] <- t; - c <- (c `>>` (W8.of_int 1)); - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[((8 * i) + 5)] <- t; - c <- (c `>>` (W8.of_int 1)); - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[((8 * i) + 6)] <- t; - c <- (c `>>` (W8.of_int 1)); - t <- (zeroextu16 c); - t <- (t `&` (W16.of_int 1)); - t <- (t * (W16.of_int ((3329 + 1) %/ 2))); - rp.[((8 * i) + 7)] <- t; - c <- (c `>>` (W8.of_int 1)); - i <- i + 1; - } - return (rp); - } - - proc _poly_getnoise (rp:W16.t Array256.t, seed:W8.t Array32.t, nonce:W8.t) : - W16.t Array256.t = { - var aux: int; - - var srp:W16.t Array256.t; - var k:int; - var c:W8.t; - var extseed:W8.t Array33.t; - var buf:W8.t Array128.t; - var i:W64.t; - var j:W64.t; - var a:W8.t; - var b:W8.t; - var t:W16.t; - buf <- witness; - extseed <- witness; - srp <- witness; - srp <- rp; - k <- 0; - while (k < 32) { - c <- seed.[k]; - extseed.[k] <- c; - k <- k + 1; - } - extseed.[32] <- nonce; - buf <@ _shake256_128_33 (buf, extseed); - rp <- srp; - i <- (W64.of_int 0); - j <- (W64.of_int 0); - - while ((i \ult (W64.of_int 128))) { - c <- buf.[(W64.to_uint i)]; - a <- c; - a <- (a `&` (W8.of_int 85)); - c <- (c `>>` (W8.of_int 1)); - c <- (c `&` (W8.of_int 85)); - c <- (c + a); - a <- c; - a <- (a `&` (W8.of_int 3)); - b <- c; - b <- (b `>>` (W8.of_int 2)); - b <- (b `&` (W8.of_int 3)); - a <- (a - b); - t <- (sigextu16 a); - rp.[(W64.to_uint j)] <- t; - a <- c; - a <- (a `>>` (W8.of_int 4)); - a <- (a `&` (W8.of_int 3)); - b <- (c `>>` (W8.of_int 6)); - b <- (b `&` (W8.of_int 3)); - a <- (a - b); - t <- (sigextu16 a); - j <- (j + (W64.of_int 1)); - rp.[(W64.to_uint j)] <- t; - i <- (i + (W64.of_int 1)); - j <- (j + (W64.of_int 1)); - } - return (rp); - } - - proc _poly_invntt (rp:W16.t Array256.t) : W16.t Array256.t = { - - var zetasp:W16.t Array128.t; - var zetasctr:W64.t; - var len:W64.t; - var start:W64.t; - var zeta_0:W16.t; - var j:W64.t; - var cmp:W64.t; - var offset:W64.t; - var s:W16.t; - var t:W16.t; - var m:W16.t; - zetasp <- witness; - zetasp <- jzetas_inv; - zetasctr <- (W64.of_int 0); - len <- (W64.of_int 2); - - while ((len \ule (W64.of_int 128))) { - start <- (W64.of_int 0); - - while ((start \ult (W64.of_int 256))) { - zeta_0 <- zetasp.[(W64.to_uint zetasctr)]; - zetasctr <- (zetasctr + (W64.of_int 1)); - j <- start; - cmp <- (start + len); - - while ((j \ult cmp)) { - offset <- (j + len); - s <- rp.[(W64.to_uint offset)]; - t <- rp.[(W64.to_uint j)]; - m <- (s + t); - m <@ __barrett_reduce (m); - rp.[(W64.to_uint j)] <- m; - t <- (t - s); - t <@ __fqmul (t, zeta_0); - rp.[(W64.to_uint offset)] <- t; - j <- (j + (W64.of_int 1)); - } - start <- (j + len); - } - len <- (len `<<` (W8.of_int 1)); - } - zeta_0 <- zetasp.[127]; - j <- (W64.of_int 0); - - while ((j \ult (W64.of_int 256))) { - t <- rp.[(W64.to_uint j)]; - t <@ __fqmul (t, zeta_0); - rp.[(W64.to_uint j)] <- t; - j <- (j + (W64.of_int 1)); - } - return (rp); - } - - proc _poly_ntt (rp:W16.t Array256.t) : W16.t Array256.t = { - - var zetasp:W16.t Array128.t; - var zetasctr:W64.t; - var len:W64.t; - var start:W64.t; - var zeta_0:W16.t; - var j:W64.t; - var cmp:W64.t; - var offset:W64.t; - var t:W16.t; - var s:W16.t; - var m:W16.t; - zetasp <- witness; - zetasp <- jzetas; - zetasctr <- (W64.of_int 0); - len <- (W64.of_int 128); - - while (((W64.of_int 2) \ule len)) { - start <- (W64.of_int 0); - - while ((start \ult (W64.of_int 256))) { - zetasctr <- (zetasctr + (W64.of_int 1)); - zeta_0 <- zetasp.[(W64.to_uint zetasctr)]; - j <- start; - cmp <- (start + len); - - while ((j \ult cmp)) { - offset <- (j + len); - t <- rp.[(W64.to_uint offset)]; - t <@ __fqmul (t, zeta_0); - s <- rp.[(W64.to_uint j)]; - m <- s; - m <- (m - t); - rp.[(W64.to_uint offset)] <- m; - t <- (t + s); - rp.[(W64.to_uint j)] <- t; - j <- (j + (W64.of_int 1)); - } - start <- (j + len); - } - len <- (len `>>` (W8.of_int 1)); - } - rp <@ __poly_reduce (rp); - return (rp); - } - - proc _poly_sub (rp:W16.t Array256.t, ap:W16.t Array256.t, - bp:W16.t Array256.t) : W16.t Array256.t = { - - var i:W64.t; - var a:W16.t; - var b:W16.t; - var r:W16.t; - - i <- (W64.of_int 0); - - while ((i \ult (W64.of_int 256))) { - a <- ap.[(W64.to_uint i)]; - b <- bp.[(W64.to_uint i)]; - r <- (a - b); - rp.[(W64.to_uint i)] <- r; - i <- (i + (W64.of_int 1)); - } - return (rp); - } - - proc _poly_tobytes (rp:W64.t, a:W16.t Array256.t) : W16.t Array256.t = { - - var i:W64.t; - var j:W64.t; - var t0:W16.t; - var t1:W16.t; - var d:W16.t; - - a <@ _poly_csubq (a); - i <- (W64.of_int 0); - j <- (W64.of_int 0); - - while ((i \ult (W64.of_int 256))) { - t0 <- a.[(W64.to_uint i)]; - i <- (i + (W64.of_int 1)); - t1 <- a.[(W64.to_uint i)]; - i <- (i + (W64.of_int 1)); - d <- t0; - d <- (d `&` (W16.of_int 255)); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 d)); - j <- (j + (W64.of_int 1)); - t0 <- (t0 `>>` (W8.of_int 8)); - d <- t1; - d <- (d `&` (W16.of_int 15)); - d <- (d `<<` (W8.of_int 4)); - d <- (d `|` t0); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 d)); - j <- (j + (W64.of_int 1)); - t1 <- (t1 `>>` (W8.of_int 4)); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 t1)); - j <- (j + (W64.of_int 1)); - } - return (a); - } - - proc _i_poly_tomsg (rp:W8.t Array32.t, a:W16.t Array256.t) : W8.t Array32.t * - W16.t Array256.t = { - var aux: int; - - var r:W8.t; - var j:int; - var i:int; - var t:W16.t; - var d:W32.t; - - a <@ _poly_csubq (a); - i <- 0; - while (i < 32) { - r <- (W8.of_int 0); - j <- 0; - while (j < 8) { - t <- a.[((8 * i) + j)]; - d <- (zeroextu32 t); - d <- (d `<<` (W8.of_int 1)); - d <- (d + (W32.of_int 1665)); - d <- (d * (W32.of_int 80635)); - d <- (d `>>` (W8.of_int 28)); - d <- (d `&` (W32.of_int 1)); - d <- (d `<<` (W8.of_int j)); - r <- (r `|` (truncateu8 d)); - j <- j + 1; - } - rp.[i] <- r; - i <- i + 1; - } - return (rp, a); - } - - proc __polyvec_add2 (r:W16.t Array768.t, b:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_add2 ((Array256.init (fun i => r.[0 + i])), - (Array256.init (fun i => b.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_add2 ((Array256.init (fun i => r.[256 + i])), - (Array256.init (fun i => b.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_add2 ((Array256.init (fun i => r.[(2 * 256) + i])), - (Array256.init (fun i => b.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_csubq (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_csubq ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_csubq ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_csubq ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_compress (rp:W64.t, a:W16.t Array768.t) : unit = { - var aux: int; - - var i:W64.t; - var j:W64.t; - var aa:W16.t Array768.t; - var k:int; - var t:W64.t Array4.t; - var c:W16.t; - var b:W16.t; - aa <- witness; - t <- witness; - i <- (W64.of_int 0); - j <- (W64.of_int 0); - aa <@ __polyvec_csubq (a); - - while ((i \ult (W64.of_int (3 * 256)))) { - k <- 0; - while (k < 4) { - t.[k] <- (zeroextu64 aa.[(W64.to_uint i)]); - i <- (i + (W64.of_int 1)); - t.[k] <- (t.[k] `<<` (W8.of_int 10)); - t.[k] <- (t.[k] + (W64.of_int 1665)); - t.[k] <- (t.[k] * (W64.of_int 1290167)); - t.[k] <- (t.[k] `>>` (W8.of_int 32)); - t.[k] <- (t.[k] `&` (W64.of_int 1023)); - k <- k + 1; - } - c <- (truncateu16 t.[0]); - c <- (c `&` (W16.of_int 255)); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 c)); - j <- (j + (W64.of_int 1)); - b <- (truncateu16 t.[0]); - b <- (b `>>` (W8.of_int 8)); - c <- (truncateu16 t.[1]); - c <- (c `<<` (W8.of_int 2)); - c <- (c `|` b); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 c)); - j <- (j + (W64.of_int 1)); - b <- (truncateu16 t.[1]); - b <- (b `>>` (W8.of_int 6)); - c <- (truncateu16 t.[2]); - c <- (c `<<` (W8.of_int 4)); - c <- (c `|` b); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 c)); - j <- (j + (W64.of_int 1)); - b <- (truncateu16 t.[2]); - b <- (b `>>` (W8.of_int 4)); - c <- (truncateu16 t.[3]); - c <- (c `<<` (W8.of_int 6)); - c <- (c `|` b); - Glob.mem <- storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 c)); - j <- (j + (W64.of_int 1)); - t.[3] <- (t.[3] `>>` (W8.of_int 2)); - Glob.mem <- - storeW8 Glob.mem (W64.to_uint (rp + j)) ((truncateu8 t.[3])); - j <- (j + (W64.of_int 1)); - } - return (); - } - - proc __i_polyvec_compress (rp:W8.t Array960.t, a:W16.t Array768.t) : - W8.t Array960.t = { - var aux: int; - - var i:W64.t; - var j:W64.t; - var aa:W16.t Array768.t; - var k:int; - var t:W64.t Array4.t; - var c:W16.t; - var b:W16.t; - aa <- witness; - t <- witness; - i <- (W64.of_int 0); - j <- (W64.of_int 0); - aa <@ __polyvec_csubq (a); - - while ((i \ult (W64.of_int (3 * 256)))) { - k <- 0; - while (k < 4) { - t.[k] <- (zeroextu64 aa.[(W64.to_uint i)]); - i <- (i + (W64.of_int 1)); - t.[k] <- (t.[k] `<<` (W8.of_int 10)); - t.[k] <- (t.[k] + (W64.of_int 1665)); - t.[k] <- (t.[k] * (W64.of_int 1290167)); - t.[k] <- (t.[k] `>>` (W8.of_int 32)); - t.[k] <- (t.[k] `&` (W64.of_int 1023)); - k <- k + 1; - } - c <- (truncateu16 t.[0]); - c <- (c `&` (W16.of_int 255)); - rp.[(W64.to_uint j)] <- (truncateu8 c); - j <- (j + (W64.of_int 1)); - b <- (truncateu16 t.[0]); - b <- (b `>>` (W8.of_int 8)); - c <- (truncateu16 t.[1]); - c <- (c `<<` (W8.of_int 2)); - c <- (c `|` b); - rp.[(W64.to_uint j)] <- (truncateu8 c); - j <- (j + (W64.of_int 1)); - b <- (truncateu16 t.[1]); - b <- (b `>>` (W8.of_int 6)); - c <- (truncateu16 t.[2]); - c <- (c `<<` (W8.of_int 4)); - c <- (c `|` b); - rp.[(W64.to_uint j)] <- (truncateu8 c); - j <- (j + (W64.of_int 1)); - b <- (truncateu16 t.[2]); - b <- (b `>>` (W8.of_int 4)); - c <- (truncateu16 t.[3]); - c <- (c `<<` (W8.of_int 6)); - c <- (c `|` b); - rp.[(W64.to_uint j)] <- (truncateu8 c); - j <- (j + (W64.of_int 1)); - t.[3] <- (t.[3] `>>` (W8.of_int 2)); - rp.[(W64.to_uint j)] <- (truncateu8 t.[3]); - j <- (j + (W64.of_int 1)); - } - return (rp); - } - - proc __polyvec_decompress (ap:W64.t) : W16.t Array768.t = { - var aux: int; - - var r:W16.t Array768.t; - var i:W64.t; - var j:W64.t; - var k:int; - var t:W32.t Array5.t; - var d:W32.t; - r <- witness; - t <- witness; - i <- (W64.of_int 0); - j <- (W64.of_int 0); - - while ((i \ult (W64.of_int (3 * 256)))) { - k <- 0; - while (k < 5) { - t.[k] <- (zeroextu32 (loadW8 Glob.mem (W64.to_uint (ap + j)))); - j <- (j + (W64.of_int 1)); - k <- k + 1; - } - d <- t.[1]; - t.[1] <- (t.[1] `>>` (W8.of_int 2)); - d <- (d `&` (W32.of_int 3)); - d <- (d `<<` (W8.of_int 8)); - t.[0] <- (t.[0] `|` d); - d <- t.[2]; - t.[2] <- (t.[2] `>>` (W8.of_int 4)); - d <- (d `&` (W32.of_int 15)); - d <- (d `<<` (W8.of_int 6)); - t.[1] <- (t.[1] `|` d); - d <- t.[3]; - t.[3] <- (t.[3] `>>` (W8.of_int 6)); - d <- (d `&` (W32.of_int 63)); - d <- (d `<<` (W8.of_int 4)); - t.[2] <- (t.[2] `|` d); - d <- t.[4]; - d <- (d `<<` (W8.of_int 2)); - t.[3] <- (t.[3] `|` d); - k <- 0; - while (k < 4) { - t.[k] <- (t.[k] * (W32.of_int 3329)); - t.[k] <- (t.[k] + (W32.of_int 512)); - t.[k] <- (t.[k] `>>` (W8.of_int 10)); - r.[(W64.to_uint i)] <- (truncateu16 t.[k]); - i <- (i + (W64.of_int 1)); - k <- k + 1; - } - } - return (r); - } - - proc __polyvec_frombytes (ap:W64.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - var r:W16.t Array768.t; - var pp:W64.t; - r <- witness; - pp <- ap; - aux <@ _poly_frombytes ((Array256.init (fun i => r.[0 + i])), pp); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_frombytes ((Array256.init (fun i => r.[256 + i])), pp); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_frombytes ((Array256.init (fun i => r.[(2 * 256) + i])), - pp); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_invntt (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_invntt ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_invntt ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_invntt ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_ntt (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ _poly_ntt ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ _poly_ntt ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ _poly_ntt ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_pointwise_acc (a:W16.t Array768.t, b:W16.t Array768.t) : - W16.t Array256.t = { - - var r:W16.t Array256.t; - var t:W16.t Array256.t; - r <- witness; - t <- witness; - r <@ _poly_basemul (r, (Array256.init (fun i => a.[0 + i])), - (Array256.init (fun i => b.[0 + i]))); - t <@ _poly_basemul (t, (Array256.init (fun i => a.[256 + i])), - (Array256.init (fun i => b.[256 + i]))); - r <@ _poly_add2 (r, t); - t <@ _poly_basemul (t, (Array256.init (fun i => a.[(2 * 256) + i])), - (Array256.init (fun i => b.[(2 * 256) + i]))); - r <@ _poly_add2 (r, t); - r <@ __poly_reduce (r); - return (r); - } - - proc __polyvec_reduce (r:W16.t Array768.t) : W16.t Array768.t = { - var aux: W16.t Array256.t; - - - - aux <@ __poly_reduce ((Array256.init (fun i => r.[0 + i]))); - r <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else r.[i]); - aux <@ __poly_reduce ((Array256.init (fun i => r.[256 + i]))); - r <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else r.[i]); - aux <@ __poly_reduce ((Array256.init (fun i => r.[(2 * 256) + i]))); - r <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else r.[i]); - return (r); - } - - proc __polyvec_tobytes (rp:W64.t, a:W16.t Array768.t) : unit = { - var aux: W16.t Array256.t; - - var pp:W64.t; - - pp <- rp; - aux <@ _poly_tobytes (pp, (Array256.init (fun i => a.[0 + i]))); - a <- Array768.init - (fun i => if 0 <= i < 0 + 256 then aux.[i-0] else a.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_tobytes (pp, (Array256.init (fun i => a.[256 + i]))); - a <- Array768.init - (fun i => if 256 <= i < 256 + 256 then aux.[i-256] else a.[i]); - pp <- (pp + (W64.of_int 384)); - aux <@ _poly_tobytes (pp, (Array256.init (fun i => a.[(2 * 256) + i]))); - a <- Array768.init - (fun i => if (2 * 256) <= i < (2 * 256) + 256 then aux.[i-(2 * 256)] - else a.[i]); - return (); - } - - proc __rej_uniform (rp:W16.t Array256.t, offset:W64.t, buf:W8.t Array168.t) : - W64.t * W16.t Array256.t = { - - var ctr:W64.t; - var pos:W64.t; - var exit:W64.t; - var val1:W16.t; - var t:W16.t; - var val2:W16.t; - var cnd0:W64.t; - var cnd1:W64.t; - - ctr <- offset; - pos <- (W64.of_int 0); - exit <- (W64.of_int 0); - - while ((exit = (W64.of_int 0))) { - val1 <- (zeroextu16 buf.[(W64.to_uint pos)]); - pos <- (pos + (W64.of_int 1)); - t <- (zeroextu16 buf.[(W64.to_uint pos)]); - val2 <- t; - val2 <- (val2 `>>` (W8.of_int 4)); - t <- (t `&` (W16.of_int 15)); - t <- (t `<<` (W8.of_int 8)); - val1 <- (val1 `|` t); - pos <- (pos + (W64.of_int 1)); - t <- (zeroextu16 buf.[(W64.to_uint pos)]); - t <- (t `<<` (W8.of_int 4)); - val2 <- (val2 `|` t); - pos <- (pos + (W64.of_int 1)); - if ((val1 \ult (W16.of_int 3329))) { - rp.[(W64.to_uint ctr)] <- val1; - ctr <- (ctr + (W64.of_int 1)); - } else { - - } - if ((val2 \ult (W16.of_int 3329))) { - if ((ctr \ult (W64.of_int 256))) { - rp.[(W64.to_uint ctr)] <- val2; - ctr <- (ctr + (W64.of_int 1)); - } else { - - } - } else { - - } - cnd0 <- (W64.of_int 256); - cnd0 <- (cnd0 - ctr); - cnd0 <- (cnd0 - (W64.of_int 1)); - cnd1 <- (W64.of_int 168); - cnd1 <- (cnd1 - pos); - cnd1 <- (cnd1 - (W64.of_int 3)); - exit <- (cnd0 `|` cnd1); - exit <- (exit `>>` (W8.of_int 63)); - } - return (ctr, rp); - } - - proc __gen_matrix (seed:W8.t Array32.t, transposed:W64.t) : W16.t Array2304.t = { - var aux: int; - - var r:W16.t Array2304.t; - var stransposed:W64.t; - var j:int; - var c:W8.t; - var extseed:W8.t Array34.t; - var i:int; - var state:W64.t Array25.t; - var ctr:W64.t; - var sctr:W64.t; - var buf:W8.t Array168.t; - var poly:W16.t Array256.t; - var k:W64.t; - var l:W64.t; - var t:W16.t; - buf <- witness; - extseed <- witness; - poly <- witness; - r <- witness; - state <- witness; - stransposed <- transposed; - j <- 0; - while (j < 32) { - c <- seed.[j]; - extseed.[j] <- c; - j <- j + 1; - } - i <- 0; - while (i < 3) { - j <- 0; - while (j < 3) { - transposed <- stransposed; - if ((transposed = (W64.of_int 0))) { - extseed.[32] <- (W8.of_int j); - extseed.[(32 + 1)] <- (W8.of_int i); - } else { - extseed.[32] <- (W8.of_int i); - extseed.[(32 + 1)] <- (W8.of_int j); - } - state <@ _shake128_absorb34 (state, extseed); - ctr <- (W64.of_int 0); - - while ((ctr \ult (W64.of_int 256))) { - sctr <- ctr; - (state, buf) <@ _shake128_squeezeblock (state, buf); - ctr <- sctr; - (ctr, poly) <@ __rej_uniform (poly, ctr, buf); - } - k <- (W64.of_int 0); - l <- (W64.of_int ((i * (3 * 256)) + (j * 256))); - - while ((k \ult (W64.of_int 256))) { - t <- poly.[(W64.to_uint k)]; - r.[(W64.to_uint l)] <- t; - k <- (k + (W64.of_int 1)); - l <- (l + (W64.of_int 1)); - } - j <- j + 1; - } - i <- i + 1; - } - return (r); - } - - proc __indcpa_keypair (pkp:W64.t, skp:W64.t, randomnessp:W8.t Array32.t) : unit = { - var aux: int; - var aux_0: W16.t Array256.t; - - var spkp:W64.t; - var sskp:W64.t; - var i:int; - var t64:W64.t; - var inbuf:W8.t Array32.t; - var buf:W8.t Array64.t; - var publicseed:W8.t Array32.t; - var noiseseed:W8.t Array32.t; - var zero:W64.t; - var a:W16.t Array2304.t; - var nonce:W8.t; - var skpv:W16.t Array768.t; - var e:W16.t Array768.t; - var pkpv:W16.t Array768.t; - a <- witness; - buf <- witness; - e <- witness; - inbuf <- witness; - noiseseed <- witness; - pkpv <- witness; - publicseed <- witness; - skpv <- witness; - spkp <- pkp; - sskp <- skp; - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (randomnessp).[i_0])) i); - inbuf <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (inbuf).[i_0])) i (t64))); - i <- i + 1; - } - buf <@ _sha3512_32 (buf, inbuf); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray64.init8 (fun i_0 => (buf).[i_0])) i); - publicseed <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (publicseed).[i_0])) i (t64))); - t64 <- - (get64 (WArray64.init8 (fun i_0 => (buf).[i_0])) (i + (32 %/ 8))); - noiseseed <- - Array32.init - (WArray32.get8 (WArray32.set64 (WArray32.init8 (fun i_0 => (noiseseed).[i_0])) i (t64))); - i <- i + 1; - } - zero <- (W64.of_int 0); - a <@ __gen_matrix (publicseed, zero); - nonce <- (W8.of_int 0); - aux_0 <@ _poly_getnoise ((Array256.init (fun i_0 => skpv.[0 + i_0])), - noiseseed, nonce); - skpv <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_0.[i_0-0] - else skpv.[i_0]); - nonce <- (W8.of_int 1); - aux_0 <@ _poly_getnoise ((Array256.init (fun i_0 => skpv.[256 + i_0])), - noiseseed, nonce); - skpv <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_0.[i_0-256] - else skpv.[i_0]); - nonce <- (W8.of_int 2); - aux_0 <@ _poly_getnoise ((Array256.init (fun i_0 => skpv.[(2 * 256) + i_0])), - noiseseed, nonce); - skpv <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_0.[i_0-(2 * 256)] else skpv.[i_0]); - nonce <- (W8.of_int 3); - aux_0 <@ _poly_getnoise ((Array256.init (fun i_0 => e.[0 + i_0])), - noiseseed, nonce); - e <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_0.[i_0-0] else e.[i_0]); - nonce <- (W8.of_int 4); - aux_0 <@ _poly_getnoise ((Array256.init (fun i_0 => e.[256 + i_0])), - noiseseed, nonce); - e <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_0.[i_0-256] - else e.[i_0]); - nonce <- (W8.of_int 5); - aux_0 <@ _poly_getnoise ((Array256.init (fun i_0 => e.[(2 * 256) + i_0])), - noiseseed, nonce); - e <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_0.[i_0-(2 * 256)] else e.[i_0]); - skpv <@ __polyvec_ntt (skpv); - e <@ __polyvec_ntt (e); - aux_0 <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => a.[0 + i_0])), - skpv); - pkpv <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_0.[i_0-0] - else pkpv.[i_0]); - aux_0 <@ _poly_frommont ((Array256.init (fun i_0 => pkpv.[0 + i_0]))); - pkpv <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux_0.[i_0-0] - else pkpv.[i_0]); - aux_0 <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => a.[(3 * 256) + i_0])), - skpv); - pkpv <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_0.[i_0-256] - else pkpv.[i_0]); - aux_0 <@ _poly_frommont ((Array256.init (fun i_0 => pkpv.[256 + i_0]))); - pkpv <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux_0.[i_0-256] - else pkpv.[i_0]); - aux_0 <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => a.[(2 * (3 * 256)) + i_0])), - skpv); - pkpv <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_0.[i_0-(2 * 256)] else pkpv.[i_0]); - aux_0 <@ _poly_frommont ((Array256.init (fun i_0 => pkpv.[(2 * 256) + i_0]))); - pkpv <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux_0.[i_0-(2 * 256)] else pkpv.[i_0]); - pkpv <@ __polyvec_add2 (pkpv, e); - pkpv <@ __polyvec_reduce (pkpv); - pkp <- spkp; - skp <- sskp; - __polyvec_tobytes (skp, skpv); - __polyvec_tobytes (pkp, pkpv); - pkp <- (pkp + (W64.of_int (3 * 384))); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (publicseed).[i_0])) i); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (pkp + (W64.of_int 0))) (t64); - pkp <- (pkp + (W64.of_int 8)); - i <- i + 1; - } - return (); - } - - proc __indcpa_enc (sctp:W64.t, msgp:W8.t Array32.t, pkp:W64.t, - noiseseed:W8.t Array32.t) : unit = { - var aux: W16.t Array256.t; - - var pkpv:W16.t Array768.t; - var i:W64.t; - var t64:W64.t; - var publicseed:W8.t Array32.t; - var k:W16.t Array256.t; - var aat:W16.t Array2304.t; - var nonce:W8.t; - var sp_0:W16.t Array768.t; - var ep:W16.t Array768.t; - var epp:W16.t Array256.t; - var bp:W16.t Array768.t; - var v:W16.t Array256.t; - var ctp:W64.t; - aat <- witness; - bp <- witness; - ep <- witness; - epp <- witness; - k <- witness; - pkpv <- witness; - publicseed <- witness; - sp_0 <- witness; - v <- witness; - pkpv <@ __polyvec_frombytes (pkp); - i <- (W64.of_int 0); - pkp <- (pkp + (W64.of_int (3 * 384))); - - while ((i \ult (W64.of_int (32 %/ 8)))) { - t64 <- (loadW64 Glob.mem (W64.to_uint (pkp + (W64.of_int 0)))); - publicseed <- - Array32.init - (WArray32.get8 (WArray32.set64_direct (WArray32.init8 (fun i_0 => (publicseed).[i_0])) (8 * (W64.to_uint i)) (t64))); - pkp <- (pkp + (W64.of_int 8)); - i <- (i + (W64.of_int 1)); - } - k <@ _i_poly_frommsg (k, msgp); - aat <@ __gen_matrix (publicseed, (W64.of_int 1)); - nonce <- (W8.of_int 0); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => sp_0.[0 + i_0])), - noiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] - else sp_0.[i_0]); - nonce <- (W8.of_int 1); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => sp_0.[256 + i_0])), - noiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux.[i_0-256] - else sp_0.[i_0]); - nonce <- (W8.of_int 2); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => sp_0.[(2 * 256) + i_0])), - noiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux.[i_0-(2 * 256)] else sp_0.[i_0]); - nonce <- (W8.of_int 3); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => ep.[0 + i_0])), - noiseseed, nonce); - ep <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else ep.[i_0]); - nonce <- (W8.of_int 4); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => ep.[256 + i_0])), - noiseseed, nonce); - ep <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux.[i_0-256] - else ep.[i_0]); - nonce <- (W8.of_int 5); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => ep.[(2 * 256) + i_0])), - noiseseed, nonce); - ep <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux.[i_0-(2 * 256)] else ep.[i_0]); - nonce <- (W8.of_int 6); - epp <@ _poly_getnoise (epp, noiseseed, nonce); - sp_0 <@ __polyvec_ntt (sp_0); - aux <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => aat.[0 + i_0])), - sp_0); - bp <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else bp.[i_0]); - aux <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => aat.[(3 * 256) + i_0])), - sp_0); - bp <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux.[i_0-256] - else bp.[i_0]); - aux <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => aat.[(2 * (3 * 256)) + i_0])), - sp_0); - bp <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux.[i_0-(2 * 256)] else bp.[i_0]); - v <@ __polyvec_pointwise_acc (pkpv, sp_0); - bp <@ __polyvec_invntt (bp); - v <@ _poly_invntt (v); - bp <@ __polyvec_add2 (bp, ep); - v <@ _poly_add2 (v, epp); - v <@ _poly_add2 (v, k); - bp <@ __polyvec_reduce (bp); - v <@ __poly_reduce (v); - ctp <- sctp; - __polyvec_compress (ctp, bp); - ctp <- (ctp + (W64.of_int (3 * 320))); - v <@ _poly_compress (ctp, v); - return (); - } - - proc __iindcpa_enc (ctp:W8.t Array1088.t, msgp:W8.t Array32.t, pkp:W64.t, - noiseseed:W8.t Array32.t) : W8.t Array1088.t = { - var aux_1: W8.t Array128.t; - var aux_0: W8.t Array960.t; - var aux: W16.t Array256.t; - - var sctp:W8.t Array1088.t; - var pkpv:W16.t Array768.t; - var i:W64.t; - var t64:W64.t; - var publicseed:W8.t Array32.t; - var k:W16.t Array256.t; - var aat:W16.t Array2304.t; - var nonce:W8.t; - var sp_0:W16.t Array768.t; - var ep:W16.t Array768.t; - var epp:W16.t Array256.t; - var bp:W16.t Array768.t; - var v:W16.t Array256.t; - aat <- witness; - bp <- witness; - ep <- witness; - epp <- witness; - k <- witness; - pkpv <- witness; - publicseed <- witness; - sctp <- witness; - sp_0 <- witness; - v <- witness; - sctp <- ctp; - pkpv <@ __polyvec_frombytes (pkp); - i <- (W64.of_int 0); - pkp <- (pkp + (W64.of_int (3 * 384))); - - while ((i \ult (W64.of_int (32 %/ 8)))) { - t64 <- (loadW64 Glob.mem (W64.to_uint (pkp + (W64.of_int 0)))); - publicseed <- - Array32.init - (WArray32.get8 (WArray32.set64_direct (WArray32.init8 (fun i_0 => (publicseed).[i_0])) (8 * (W64.to_uint i)) (t64))); - pkp <- (pkp + (W64.of_int 8)); - i <- (i + (W64.of_int 1)); - } - k <@ _i_poly_frommsg (k, msgp); - aat <@ __gen_matrix (publicseed, (W64.of_int 1)); - nonce <- (W8.of_int 0); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => sp_0.[0 + i_0])), - noiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] - else sp_0.[i_0]); - nonce <- (W8.of_int 1); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => sp_0.[256 + i_0])), - noiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux.[i_0-256] - else sp_0.[i_0]); - nonce <- (W8.of_int 2); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => sp_0.[(2 * 256) + i_0])), - noiseseed, nonce); - sp_0 <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux.[i_0-(2 * 256)] else sp_0.[i_0]); - nonce <- (W8.of_int 3); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => ep.[0 + i_0])), - noiseseed, nonce); - ep <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else ep.[i_0]); - nonce <- (W8.of_int 4); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => ep.[256 + i_0])), - noiseseed, nonce); - ep <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux.[i_0-256] - else ep.[i_0]); - nonce <- (W8.of_int 5); - aux <@ _poly_getnoise ((Array256.init (fun i_0 => ep.[(2 * 256) + i_0])), - noiseseed, nonce); - ep <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux.[i_0-(2 * 256)] else ep.[i_0]); - nonce <- (W8.of_int 6); - epp <@ _poly_getnoise (epp, noiseseed, nonce); - sp_0 <@ __polyvec_ntt (sp_0); - aux <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => aat.[0 + i_0])), - sp_0); - bp <- Array768.init - (fun i_0 => if 0 <= i_0 < 0 + 256 then aux.[i_0-0] else bp.[i_0]); - aux <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => aat.[(3 * 256) + i_0])), - sp_0); - bp <- Array768.init - (fun i_0 => if 256 <= i_0 < 256 + 256 then aux.[i_0-256] - else bp.[i_0]); - aux <@ __polyvec_pointwise_acc ((Array768.init (fun i_0 => aat.[(2 * (3 * 256)) + i_0])), - sp_0); - bp <- Array768.init - (fun i_0 => if (2 * 256) <= i_0 < (2 * 256) + 256 - then aux.[i_0-(2 * 256)] else bp.[i_0]); - v <@ __polyvec_pointwise_acc (pkpv, sp_0); - bp <@ __polyvec_invntt (bp); - v <@ _poly_invntt (v); - bp <@ __polyvec_add2 (bp, ep); - v <@ _poly_add2 (v, epp); - v <@ _poly_add2 (v, k); - bp <@ __polyvec_reduce (bp); - v <@ __poly_reduce (v); - ctp <- sctp; - aux_0 <@ __i_polyvec_compress ((Array960.init (fun i_0 => ctp.[0 + i_0])), - bp); - ctp <- Array1088.init - (fun i_0 => if 0 <= i_0 < 0 + 960 then aux_0.[i_0-0] - else ctp.[i_0]); - (aux_1, - aux) <@ _i_poly_compress ((Array128.init (fun i_0 => ctp.[(3 * 320) + i_0])), - v); - ctp <- Array1088.init - (fun i_0 => if (3 * 320) <= i_0 < (3 * 320) + 128 - then aux_1.[i_0-(3 * 320)] else ctp.[i_0]); - v <- aux; - return (ctp); - } - - proc __indcpa_dec (msgp:W8.t Array32.t, ctp:W64.t, skp:W64.t) : W8.t Array32.t = { - - var bp:W16.t Array768.t; - var v:W16.t Array256.t; - var skpv:W16.t Array768.t; - var t:W16.t Array256.t; - var mp:W16.t Array256.t; - bp <- witness; - mp <- witness; - skpv <- witness; - t <- witness; - v <- witness; - bp <@ __polyvec_decompress (ctp); - ctp <- (ctp + (W64.of_int (3 * 320))); - v <@ _poly_decompress (v, ctp); - skpv <@ __polyvec_frombytes (skp); - bp <@ __polyvec_ntt (bp); - t <@ __polyvec_pointwise_acc (skpv, bp); - t <@ _poly_invntt (t); - mp <@ _poly_sub (mp, v, t); - mp <@ __poly_reduce (mp); - (msgp, mp) <@ _i_poly_tomsg (msgp, mp); - return (msgp); - } - - proc __verify (ctp:W64.t, ctpc:W8.t Array1088.t) : W64.t = { - var aux: int; - - var cnd:W64.t; - var i:int; - var t1:W8.t; - var t2:W8.t; - var t64:W64.t; - - cnd <- (W64.of_int 0); - aux <- ((3 * 320) + 128); - i <- 0; - while (i < aux) { - t1 <- (get8_direct (WArray1088.init8 (fun i_0 => (ctpc).[i_0])) i); - t2 <- (loadW8 Glob.mem (W64.to_uint (ctp + (W64.of_int i)))); - t1 <- (t1 `^` t2); - t64 <- (zeroextu64 t1); - cnd <- (cnd `|` t64); - i <- i + 1; - } - cnd <- (- cnd); - cnd <- (cnd `>>` (W8.of_int 63)); - return (cnd); - } - - proc __cmov (dst:W8.t Array32.t, src:W64.t, cnd:W64.t) : W8.t Array32.t = { - var aux: int; - - var i:int; - var t1:W8.t; - var t2:W8.t; - - cnd <- (- cnd); - i <- 0; - while (i < 32) { - t1 <- (get8_direct (WArray32.init8 (fun i_0 => (dst).[i_0])) i); - t2 <- (loadW8 Glob.mem (W64.to_uint (src + (W64.of_int i)))); - t2 <- (t2 `^` t1); - t2 <- (t2 `&` (truncateu8 cnd)); - t1 <- (t1 `^` t2); - dst <- - Array32.init - (WArray32.get8 (WArray32.set8_direct (WArray32.init8 (fun i_0 => (dst).[i_0])) i (t1))); - i <- i + 1; - } - return (dst); - } - - proc __crypto_kem_keypair_jazz (pkp:W64.t, skp:W64.t, - randomnessp:W8.t Array64.t) : unit = { - var aux: int; - - var s_randomnessp:W8.t Array64.t; - var s_pkp:W64.t; - var s_skp:W64.t; - var randomnessp1:W8.t Array32.t; - var i:int; - var t64:W64.t; - var h_pk:W8.t Array32.t; - var randomnessp2:W8.t Array32.t; - h_pk <- witness; - randomnessp1 <- witness; - randomnessp2 <- witness; - s_randomnessp <- witness; - s_randomnessp <- randomnessp; - s_pkp <- pkp; - s_skp <- skp; - randomnessp1 <- (Array32.init (fun i_0 => randomnessp.[0 + i_0])); - __indcpa_keypair (pkp, skp, randomnessp1); - skp <- s_skp; - skp <- (skp + (W64.of_int (3 * 384))); - pkp <- s_pkp; - aux <- (((3 * 384) + 32) %/ 8); - i <- 0; - while (i < aux) { - t64 <- (loadW64 Glob.mem (W64.to_uint (pkp + (W64.of_int (8 * i))))); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (skp + (W64.of_int 0))) (t64); - skp <- (skp + (W64.of_int 8)); - i <- i + 1; - } - s_skp <- skp; - pkp <- s_pkp; - t64 <- (W64.of_int ((3 * 384) + 32)); - h_pk <@ _isha3_256 (h_pk, pkp, t64); - skp <- s_skp; - i <- 0; - while (i < 4) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (h_pk).[i_0])) i); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (skp + (W64.of_int 0))) (t64); - skp <- (skp + (W64.of_int 8)); - i <- i + 1; - } - randomnessp <- s_randomnessp; - randomnessp2 <- (Array32.init (fun i_0 => randomnessp.[32 + i_0])); - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (randomnessp2).[i_0])) i); - Glob.mem <- - storeW64 Glob.mem (W64.to_uint (skp + (W64.of_int 0))) (t64); - skp <- (skp + (W64.of_int 8)); - i <- i + 1; - } - return (); - } - - proc __crypto_kem_enc_jazz (ctp:W64.t, shkp:W64.t, pkp:W64.t, - randomnessp:W8.t Array32.t) : unit = { - var aux: int; - var aux_0: W8.t Array32.t; - - var s_pkp:W64.t; - var s_ctp:W64.t; - var s_shkp:W64.t; - var i:int; - var t64:W64.t; - var kr:W8.t Array64.t; - var buf:W8.t Array64.t; - buf <- witness; - kr <- witness; - s_pkp <- pkp; - s_ctp <- ctp; - s_shkp <- shkp; - aux <- (32 %/ 8); - i <- 0; - while (i < aux) { - t64 <- (get64 (WArray32.init8 (fun i_0 => (randomnessp).[i_0])) i); - kr <- - Array64.init - (WArray64.get8 (WArray64.set64 (WArray64.init8 (fun i_0 => (kr).[i_0])) i (t64))); - i <- i + 1; - } - aux_0 <@ _isha3_256_32 ((Array32.init (fun i_0 => buf.[0 + i_0])), - (Array32.init (fun i_0 => kr.[0 + i_0]))); - buf <- Array64.init - (fun i_0 => if 0 <= i_0 < 0 + 32 then aux_0.[i_0-0] - else buf.[i_0]); - pkp <- s_pkp; - t64 <- (W64.of_int ((3 * 384) + 32)); - aux_0 <@ _isha3_256 ((Array32.init (fun i_0 => buf.[32 + i_0])), pkp, - t64); - buf <- Array64.init - (fun i_0 => if 32 <= i_0 < 32 + 32 then aux_0.[i_0-32] - else buf.[i_0]); - kr <@ _sha3_512_64 (kr, buf); - pkp <- s_pkp; - __indcpa_enc (s_ctp, (Array32.init (fun i_0 => buf.[0 + i_0])), pkp, - (Array32.init (fun i_0 => kr.[32 + i_0]))); - ctp <- s_ctp; - t64 <- (W64.of_int ((3 * 320) + 128)); - aux_0 <@ _isha3_256 ((Array32.init (fun i_0 => kr.[32 + i_0])), ctp, - t64); - kr <- Array64.init - (fun i_0 => if 32 <= i_0 < 32 + 32 then aux_0.[i_0-32] - else kr.[i_0]); - shkp <- s_shkp; - t64 <- (W64.of_int 32); - _shake256_64 (shkp, t64, kr); - return (); - } - - proc __crypto_kem_dec_jazz (shkp:W64.t, ctp:W64.t, skp:W64.t) : unit = { - var aux_0: int; - var aux: W8.t Array32.t; - - var s_shkp:W64.t; - var s_ctp:W64.t; - var buf:W8.t Array64.t; - var hp:W64.t; - var i:int; - var t64:W64.t; - var s_skp:W64.t; - var kr:W8.t Array64.t; - var pkp:W64.t; - var ctpc:W8.t Array1088.t; - var cnd:W64.t; - var zp:W64.t; - buf <- witness; - ctpc <- witness; - kr <- witness; - s_shkp <- shkp; - s_ctp <- ctp; - aux <@ __indcpa_dec ((Array32.init (fun i_0 => buf.[0 + i_0])), ctp, - skp); - buf <- Array64.init - (fun i_0 => if 0 <= i_0 < 0 + 32 then aux.[i_0-0] else buf.[i_0]); - hp <- (skp + (W64.of_int 32)); - hp <- (hp + (W64.of_int (((24 * 3) * 256) `|>>` 3))); - aux_0 <- (32 %/ 8); - i <- 0; - while (i < aux_0) { - t64 <- (loadW64 Glob.mem (W64.to_uint (hp + (W64.of_int (8 * i))))); - buf <- - Array64.init - (WArray64.get8 (WArray64.set64_direct (WArray64.init8 (fun i_0 => (buf).[i_0])) (32 + (8 * i)) (t64))); - i <- i + 1; - } - s_skp <- skp; - kr <@ _sha3_512_64 (kr, buf); - pkp <- s_skp; - pkp <- (pkp + (W64.of_int (((12 * 3) * 256) `|>>` 3))); - ctpc <@ __iindcpa_enc (ctpc, (Array32.init (fun i_0 => buf.[0 + i_0])), - pkp, (Array32.init (fun i_0 => kr.[32 + i_0]))); - ctp <- s_ctp; - cnd <@ __verify (ctp, ctpc); - zp <- s_skp; - zp <- (zp + (W64.of_int 64)); - zp <- (zp + (W64.of_int (((24 * 3) * 256) `|>>` 3))); - aux <@ __cmov ((Array32.init (fun i_0 => kr.[0 + i_0])), zp, cnd); - kr <- Array64.init - (fun i_0 => if 0 <= i_0 < 0 + 32 then aux.[i_0-0] else kr.[i_0]); - t64 <- (W64.of_int ((3 * 320) + 128)); - aux <@ _isha3_256 ((Array32.init (fun i_0 => kr.[32 + i_0])), ctp, t64); - kr <- Array64.init - (fun i_0 => if 32 <= i_0 < 32 + 32 then aux.[i_0-32] - else kr.[i_0]); - shkp <- s_shkp; - t64 <- (W64.of_int 32); - _shake256_64 (shkp, t64, kr); - return (); - } - - proc jade_kem_kyber_kyber768_amd64_ref_keypair (public_key:W64.t, - secret_key:W64.t) : - W64.t = { - - var r:W64.t; - var randomness:W8.t Array64.t; - var randomnessp:W8.t Array64.t; - var _of_:bool; - var _cf_:bool; - var _sf_:bool; - var _zf_:bool; - var _0:bool; - randomness <- witness; - randomnessp <- witness; - public_key <- public_key; - secret_key <- secret_key; - randomnessp <- randomness; - randomnessp <@ SC.randombytes_64 (randomnessp); - __crypto_kem_keypair_jazz (public_key, secret_key, randomnessp); - (_of_, _cf_, _sf_, _0, _zf_, r) <- set0_64 ; - return (r); - } - - proc jade_kem_kyber_kyber768_amd64_ref_enc (ciphertext:W64.t, - shared_secret:W64.t, - public_key:W64.t) : W64.t = { - - var r:W64.t; - var randomness:W8.t Array32.t; - var randomnessp:W8.t Array32.t; - var _of_:bool; - var _cf_:bool; - var _sf_:bool; - var _zf_:bool; - var _0:bool; - randomness <- witness; - randomnessp <- witness; - ciphertext <- ciphertext; - shared_secret <- shared_secret; - public_key <- public_key; - randomnessp <- randomness; - randomnessp <@ SC.randombytes_32 (randomnessp); - __crypto_kem_enc_jazz (ciphertext, shared_secret, public_key, - randomnessp); - (_of_, _cf_, _sf_, _0, _zf_, r) <- set0_64 ; - return (r); - } - - proc jade_kem_kyber_kyber768_amd64_ref_dec (shared_secret:W64.t, - ciphertext:W64.t, - secret_key:W64.t) : W64.t = { - - var r:W64.t; - var _of_:bool; - var _cf_:bool; - var _sf_:bool; - var _zf_:bool; - var _0:bool; - - __crypto_kem_dec_jazz (shared_secret, ciphertext, secret_key); - (_of_, _cf_, _sf_, _0, _zf_, r) <- set0_64 ; - return (r); - } -}. - diff --git a/code/jasmin/ref/fips202.c b/code/jasmin/ref/fips202.c deleted file mode 100644 index d300328b..00000000 --- a/code/jasmin/ref/fips202.c +++ /dev/null @@ -1,549 +0,0 @@ -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ - -#include -#include -#include "fips202.h" - -#define NROUNDS 24 -#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) - -/************************************************* -* Name: load64 -* -* Description: Load 8 bytes into uint64_t in little-endian order -* -* Arguments: - const unsigned char *x: pointer to input byte array -* -* Returns the loaded 64-bit unsigned integer -**************************************************/ -static uint64_t load64(const unsigned char *x) -{ - unsigned long long r = 0, i; - - for (i = 0; i < 8; ++i) { - r |= (unsigned long long)x[i] << 8 * i; - } - return r; -} - -/************************************************* -* Name: store64 -* -* Description: Store a 64-bit integer to a byte array in little-endian order -* -* Arguments: - uint8_t *x: pointer to the output byte array -* - uint64_t u: input 64-bit unsigned integer -**************************************************/ -static void store64(uint8_t *x, uint64_t u) -{ - unsigned int i; - - for(i=0; i<8; ++i) { - x[i] = u; - u >>= 8; - } -} - -/* Keccak round constants */ -static const uint64_t KeccakF_RoundConstants[NROUNDS] = -{ - (uint64_t)0x0000000000000001ULL, - (uint64_t)0x0000000000008082ULL, - (uint64_t)0x800000000000808aULL, - (uint64_t)0x8000000080008000ULL, - (uint64_t)0x000000000000808bULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008009ULL, - (uint64_t)0x000000000000008aULL, - (uint64_t)0x0000000000000088ULL, - (uint64_t)0x0000000080008009ULL, - (uint64_t)0x000000008000000aULL, - (uint64_t)0x000000008000808bULL, - (uint64_t)0x800000000000008bULL, - (uint64_t)0x8000000000008089ULL, - (uint64_t)0x8000000000008003ULL, - (uint64_t)0x8000000000008002ULL, - (uint64_t)0x8000000000000080ULL, - (uint64_t)0x000000000000800aULL, - (uint64_t)0x800000008000000aULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008080ULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008008ULL -}; - -/************************************************* -* Name: KeccakF1600_StatePermute -* -* Description: The Keccak F1600 Permutation -* -* Arguments: - uint64_t * state: pointer to in/output Keccak state -**************************************************/ -static void KeccakF1600_StatePermute(uint64_t * state) -{ - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - uint64_t BCa, BCe, BCi, BCo, BCu; - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - //copyFromState(A, state) - Aba = state[ 0]; - Abe = state[ 1]; - Abi = state[ 2]; - Abo = state[ 3]; - Abu = state[ 4]; - Aga = state[ 5]; - Age = state[ 6]; - Agi = state[ 7]; - Ago = state[ 8]; - Agu = state[ 9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for( round = 0; round < NROUNDS; round += 2 ) - { - // prepareTheta - BCa = Aba^Aga^Aka^Ama^Asa; - BCe = Abe^Age^Ake^Ame^Ase; - BCi = Abi^Agi^Aki^Ami^Asi; - BCo = Abo^Ago^Ako^Amo^Aso; - BCu = Abu^Agu^Aku^Amu^Asu; - - //thetaRhoPiChiIotaPrepareTheta(round , A, E) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^((~BCe)& BCi ); - Eba ^= (uint64_t)KeccakF_RoundConstants[round]; - Ebe = BCe ^((~BCi)& BCo ); - Ebi = BCi ^((~BCo)& BCu ); - Ebo = BCo ^((~BCu)& BCa ); - Ebu = BCu ^((~BCa)& BCe ); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^((~BCe)& BCi ); - Ege = BCe ^((~BCi)& BCo ); - Egi = BCi ^((~BCo)& BCu ); - Ego = BCo ^((~BCu)& BCa ); - Egu = BCu ^((~BCa)& BCe ); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^((~BCe)& BCi ); - Eke = BCe ^((~BCi)& BCo ); - Eki = BCi ^((~BCo)& BCu ); - Eko = BCo ^((~BCu)& BCa ); - Eku = BCu ^((~BCa)& BCe ); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^((~BCe)& BCi ); - Eme = BCe ^((~BCi)& BCo ); - Emi = BCi ^((~BCo)& BCu ); - Emo = BCo ^((~BCu)& BCa ); - Emu = BCu ^((~BCa)& BCe ); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^((~BCe)& BCi ); - Ese = BCe ^((~BCi)& BCo ); - Esi = BCi ^((~BCo)& BCu ); - Eso = BCo ^((~BCu)& BCa ); - Esu = BCu ^((~BCa)& BCe ); - - // prepareTheta - BCa = Eba^Ega^Eka^Ema^Esa; - BCe = Ebe^Ege^Eke^Eme^Ese; - BCi = Ebi^Egi^Eki^Emi^Esi; - BCo = Ebo^Ego^Eko^Emo^Eso; - BCu = Ebu^Egu^Eku^Emu^Esu; - - //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^((~BCe)& BCi ); - Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; - Abe = BCe ^((~BCi)& BCo ); - Abi = BCi ^((~BCo)& BCu ); - Abo = BCo ^((~BCu)& BCa ); - Abu = BCu ^((~BCa)& BCe ); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^((~BCe)& BCi ); - Age = BCe ^((~BCi)& BCo ); - Agi = BCi ^((~BCo)& BCu ); - Ago = BCo ^((~BCu)& BCa ); - Agu = BCu ^((~BCa)& BCe ); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^((~BCe)& BCi ); - Ake = BCe ^((~BCi)& BCo ); - Aki = BCi ^((~BCo)& BCu ); - Ako = BCo ^((~BCu)& BCa ); - Aku = BCu ^((~BCa)& BCe ); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^((~BCe)& BCi ); - Ame = BCe ^((~BCi)& BCo ); - Ami = BCi ^((~BCo)& BCu ); - Amo = BCo ^((~BCu)& BCa ); - Amu = BCu ^((~BCa)& BCe ); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^((~BCe)& BCi ); - Ase = BCe ^((~BCi)& BCo ); - Asi = BCi ^((~BCo)& BCu ); - Aso = BCo ^((~BCu)& BCa ); - Asu = BCu ^((~BCa)& BCe ); - } - - //copyToState(state, A) - state[ 0] = Aba; - state[ 1] = Abe; - state[ 2] = Abi; - state[ 3] = Abo; - state[ 4] = Abu; - state[ 5] = Aga; - state[ 6] = Age; - state[ 7] = Agi; - state[ 8] = Ago; - state[ 9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; - - #undef round -} - -#include -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - - -/************************************************* -* Name: keccak_absorb -* -* Description: Absorb step of Keccak; -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -* - const unsigned char *m: pointer to input to be absorbed into s -* - unsigned long long mlen: length of input in bytes -* - unsigned char p: domain-separation byte for different Keccak-derived functions -**************************************************/ -static void keccak_absorb(uint64_t *s, - unsigned int r, - const unsigned char *m, unsigned long long int mlen, - unsigned char p) -{ - unsigned long long i; - unsigned char t[200]; - - // Zero state - for (i = 0; i < 25; ++i) - s[i] = 0; - - while (mlen >= r) - { - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(m + 8 * i); - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) - t[i] = 0; - for (i = 0; i < mlen; ++i) - t[i] = m[i]; - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(t + 8 * i); -} - - -/************************************************* -* Name: keccak_squeezeblocks -* -* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *h: pointer to output blocks -* - unsigned long long int nblocks: number of blocks to be squeezed (written to h) -* - uint64_t *s: pointer to in/output Keccak state -* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -**************************************************/ -static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, - uint64_t *s, - unsigned int r) -{ - unsigned int i; - while(nblocks > 0) - { - KeccakF1600_StatePermute(s); - for(i=0;i<(r>>3);i++) - { - store64(h+8*i, s[i]); - } - h += r; - nblocks--; - } -} - - -/************************************************* -* Name: shake128_absorb -* -* Description: Absorb step of the SHAKE128 XOF. -* non-incremental, starts by zeroeing the state. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to input to be absorbed into s -* - unsigned long long inputByteLen: length of input in bytes -**************************************************/ -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) -{ - keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F); -} - -/************************************************* -* Name: shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - uint64_t *s: pointer to in/output Keccak state -**************************************************/ -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) -{ - keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); -} - -/************************************************* -* Name: shake256 -* -* Description: SHAKE256 XOF with non-incremental API -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: requested output length in bytes - - const unsigned char *input: pointer to input - - unsigned long long inlen: length of input in bytes -**************************************************/ -void shake256(unsigned char *output, unsigned long long outlen, - const unsigned char *input, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHAKE256_RATE]; - unsigned long long nblocks = outlen/SHAKE256_RATE; - size_t i; - - /* Absorb input */ - keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F); - - /* Squeeze output */ - keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); - - output+=nblocks*SHAKE256_RATE; - outlen-=nblocks*SHAKE256_RATE; - - if(outlen) - { - keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); - for(i=0;i - -#define SHAKE128_RATE 168 -#define SHAKE256_RATE 136 -#define SHA3_256_RATE 136 -#define SHA3_512_RATE 72 - -void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); -void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); - -void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen); - -void sha3_256(unsigned char *output, const unsigned char *input, unsigned long long inlen); -void sha3_512(unsigned char *output, const unsigned char *input, unsigned long long inlen); - - - -void shake256_128_33_jazz(unsigned char *output, const unsigned char *input); -void sha3512_32_jazz(unsigned char *output, const unsigned char *input); - -void shake128_absorb34_jazz(uint64_t *s, const unsigned char *input); -void shake128_squeezeblock_jazz(unsigned char *output, uint64_t *s); - - -#endif diff --git a/code/jasmin/ref/fips202.jinc b/code/jasmin/ref/fips202.jinc deleted file mode 100644 index bdc42019..00000000 --- a/code/jasmin/ref/fips202.jinc +++ /dev/null @@ -1,673 +0,0 @@ -param int SHAKE128_RATE = 168; -param int SHAKE256_RATE = 136; -param int SHA3_256_RATE = 136; -param int SHA3_512_RATE = 72; - -param int SHAKE128_RATE = 168; -param int SHAKE256_RATE = 136; -param int SHA3_512_RATE = 72; - -inline -fn __index(inline int x, inline int y) -> inline int { - inline int r; - r = (x % 5) + 5 * (y % 5); - return r; -} - - -inline -fn __ROL64(reg u64 x, inline int c) -> reg u64 { - reg u64 y; - _, _, y = #ROL_64(x, c); - return y; -} - -inline -fn __theta(reg ptr u64[25] a) -> reg ptr u64[25] { - inline int x, y; - reg u64[5] c, d; - - for x = 0 to 5 { - c[x] = 0; - for y = 0 to 5 { - c[x] ^= a[x + 5 * y]; - } - } - - for x = 0 to 5 { - /* d[x] = __ROL64(c[(x + 1) % 5], 1); */ - /* extraction fails */ - - /* _, _, d[x] = #ROL_64(c[(x + 1) % 5], 1);*/ - /* d[x] ^= c[(x + 4) % 5];*/ - /* does not compile */ - - d[x] = c[(x + 1) % 5]; - _, _, d[x] = #ROL_64(d[x], 1); - d[x] ^= c[(x + 4) % 5]; - } - - for x = 0 to 5 { - for y = 0 to 5 { - a[x + 5 * y] ^= d[x]; - } - } - - return a; -} - - -inline -fn __keccakRhoOffsets(inline int i) -> inline int { - inline int r, x, y, z, t; - - r = 0; - x = 1; - y = 0; - for t = 0 to 24 { - if (i == x + 5 * y) { - r = ((t + 1) * (t + 2) / 2) % 64; - } - z = (2 * x + 3 * y) % 5; - x = y; - y = z; - } - - return r; -} - - -inline -fn __rho(reg ptr u64[25] a) -> reg ptr u64[25] { - inline int x, y, i, z; - - for x = 0 to 5 { - for y = 0 to 5 { - i = __index(x, y); - z = __keccakRhoOffsets(i); - _, _, a[i] = #ROL_64(a[i], z); - } - } - - return a; -} - - -inline -fn __pi(reg ptr u64[25] a) -> reg ptr u64[25] { - stack u64[25] b; - reg u64 t; - inline int x, y, i; - for i = 0 to 25 { t = a[i]; b[i] = t; } - - for x = 0 to 5 { - for y = 0 to 5 { - t = b[x + 5 * y]; - i = __index(y, 2 * x + 3 * y); - a[i] = t; - } - } - return a; -} - - -inline -fn __chi(reg ptr u64[25] a) -> reg ptr u64[25] { - inline int x, y, i; - reg u64[5] c; - for y = 0 to 5 { - for x = 0 to 5 { - i = __index(x + 1, y); - c[x] = a[i]; - c[x] = !c[x]; - i = __index(x + 2, y); - c[x] &= a[i]; - i = __index(x, y); - c[x] ^= a[i]; - } - for x = 0 to 5 { - a[x + 5 * y] = c[x]; - } - } - return a; -} - - -inline -fn __iota(reg ptr u64[25] a, reg u64 c) -> reg ptr u64[25] { - a[0] ^= c; - return a; -} - -u64[24] roundconstants = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000, - 0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009, - 0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003, - 0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a, - 0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008}; - - -fn __keccakf1600_ref(reg ptr u64[25] state) -> reg ptr u64[25] { - inline int round; - reg ptr u64[24] constptr; - - reg u64 rctr; - - constptr = roundconstants; - rctr = 0; - - while (rctr < 192) { - state = __theta(state); - state = __rho(state); - state = __pi(state); - state = __chi(state); - constptr = roundconstants; - state = __iota(state, constptr.[(int)rctr]); - rctr += 8; - } - - return state; -} - - -inline -fn __st0(reg ptr u64[25] state) -> reg ptr u64[25] -{ - inline int i; - - for i = 0 to 25 { - state[i] = 0; - } - - return state; -} - - -inline -fn __add_full_block( - stack u64[25] state, - reg u64 in, - reg u64 inlen, - reg u64 r8 -) -> stack u64[25], reg u64, reg u64 -{ - reg u64 i t r64; - - r64 = r8; - r64 >>= 3; - i = 0; - while (i < r64) - { - t = [in + 8 * i]; - state[(int) i] ^= t; - i = i + 1; - } - - in += r8; - inlen -= r8; - - return state, in, inlen; -} - - -inline -fn __add_final_block( - stack u64[25] state, - reg u64 in, - reg u64 inlen, - reg u8 trail_byte, - reg u64 r8 -) -> stack u64[25] -{ - reg u64 i, t, inlen8; - reg u8 c; - - inlen8 = inlen; - inlen8 >>= 3; - i = 0; - while ( i < inlen8) - { - t = [in + 8*i]; - state[(int) i] ^= t; - i = i + 1; - } - - i <<= 3; - while (i < inlen) - { - c = (u8)[in + i]; - state[u8 (int) i] ^= c; - i = i + 1; - } - - state[u8 (int) i] ^= trail_byte; - - i = r8; - i -= 1; - state[u8 (int) i] ^= 0x80; - - return state; -} - - -inline -fn __xtr_full_block( - stack u64[25] state, - reg u64 out, - reg u64 outlen, - reg u64 rate -) -> reg u64, reg u64 -{ - reg u64 i t rate64; - - rate64 = rate; - rate64 >>= 3; - i = 0; - while (i < rate64) - { - t = state[(int) i]; - [out + 8 * i] = t; - i = i + 1; - } - - out += rate; - outlen -= rate; - - return out, outlen; -} - - -inline -fn ____xtr_bytes( - stack u64[25] state, - reg u64 out, - reg u64 outlen -) -{ - reg u64 i t outlen8; - reg u8 c; - - outlen8 = outlen; - outlen8 >>= 3; - i = 0; - while (i < outlen8 ) - { - t = state[(int) i]; - [out + 8 * i] = t; - i = i + 1; - } - i <<= 3; - - while (i < outlen) - { - c = state[u8 (int) i]; - (u8)[out + i] = c; - i = i + 1; - } -} - - -inline -fn ____keccak1600_ref( - stack u64 s_out s_outlen, - reg u64 in inlen, - stack u64 s_trail_byte, - reg u64 rate -) -{ - stack u64[25] state; - stack u64 s_in, s_inlen, s_rate; - reg u64 out, outlen, t; - reg u8 trail_byte; - - state = __st0(state); - - while ( inlen >= rate ) - { - state, in, inlen = __add_full_block(state, in, inlen, rate); - - s_in = in; - s_inlen = inlen; - s_rate = rate; - - state = __keccakf1600_ref(state); - - inlen = s_inlen; - in = s_in; - rate = s_rate; - } - - t = s_trail_byte; - trail_byte = (8u) t; - state = __add_final_block(state, in, inlen, trail_byte, rate); - - outlen = s_outlen; - - while ( outlen > rate ) - { - s_outlen = outlen; - s_rate = rate; - - state = __keccakf1600_ref(state); - - out = s_out; - outlen = s_outlen; - rate = s_rate; - - out, outlen = __xtr_full_block(state, out, outlen, rate); - s_outlen = outlen; - s_out = out; - } - - state = __keccakf1600_ref(state); - out = s_out; - outlen = s_outlen; - - ____xtr_bytes(state, out, outlen); -} - - -inline -fn __keccak1600_ref(reg u64 out outlen in inlen, stack u64[2] config) -{ - stack u64 s_trail_byte; - stack u64 s_out s_outlen; - reg u64 trail_byte; - reg u64 rate; - - s_out = out; - s_outlen = outlen; - - trail_byte = config[0]; - s_trail_byte = trail_byte; - - rate = config[1]; - - ____keccak1600_ref(s_out, s_outlen, in, inlen, s_trail_byte, rate); -} - -inline -fn __shake256(reg u64 out outlen in inlen) -{ - reg u64 ds; - reg u64 rate; - stack u64[2] config; - ds = 0x1f; - rate = SHAKE256_RATE; - config[0] = ds; - config[1] = rate; - __keccak1600_ref(out, outlen, in, inlen, config); -} - - -fn _shake256_128_33(reg ptr u8[128] out, reg const ptr u8[33] in) -> stack u8[128] -{ - stack u64[25] state; - reg u8 c; - inline int i; - - stack ptr u8[128] sout; - - sout = out; - - state = __st0(state); - - for i = 0 to 33 { - c = in[i]; - state[u8 i] ^= c; - } - state[u8 33] ^= 0x1f; - state[u8 SHAKE256_RATE-1] ^= 0x80; - - state = __keccakf1600_ref(state); - - out = sout; - - for i = 0 to 128 { - c = state[u8 (int) i]; - out[i] = c; - } - return out; -} - -fn _sha3512_32(reg ptr u8[64] out, reg const ptr u8[32] in) -> stack u8[64] -{ - stack u64[25] state; - reg u8 c; - inline int i; - - state = __st0(state); - - for i = 0 to 32 { - c = in[i]; - state[u8 i] ^= c; - } - state[u8 32] ^= 0x06; - state[u8 SHA3_512_RATE-1] ^= 0x80; - - state = __keccakf1600_ref(state); - - for i = 0 to 64 { - c = state[u8 (int) i]; - out[i] = c; - } - return out; -} - - -fn _shake128_absorb34(reg ptr u64[25] state, reg const ptr u8[34] in) -> reg ptr u64[25] -{ - reg u8 c; - inline int i; - - state = __st0(state); - - for i = 0 to 34 { - c = in[i]; - state[u8 i] ^= c; - } - state[u8 34] ^= 0x1f; - state[u8 SHAKE128_RATE-1] ^= 0x80; - - return state; -} - - -fn _shake128_squeezeblock(reg ptr u64[25] state, reg ptr u8[SHAKE128_RATE] out) -> reg ptr u64[25], reg ptr u8[SHAKE128_RATE] -{ - reg u8 c; - inline int i; - - state = __keccakf1600_ref(state); - - for i = 0 to SHAKE128_RATE { - c = state[u8 (int) i]; - out[i] = c; - } - return state, out; -} - -#[returnaddress="stack"] -fn _isha3_256(reg ptr u8[32] out, reg u64 in inlen) -> reg ptr u8[32] -{ - stack u64[25] state; - stack ptr u8[32] s_out; - stack u64 s_in s_ilen s_r8; - reg u64 ilen r8 t64; - reg u8 t8; - inline int i; - - s_out = out; - - state = __st0(state); - - r8 = SHA3_256_RATE; - ilen = inlen; - - while(ilen >= r8) - { - state, in, ilen = __add_full_block(state, in, ilen, r8); - - s_in = in; - s_ilen = ilen; - s_r8 = r8; - - state = __keccakf1600_ref(state); - - in = s_in; - ilen = s_ilen; - r8 = s_r8; - } - - t8 = 0x06; - state = __add_final_block(state, in, ilen, t8, r8); - - state = __keccakf1600_ref(state); - - out = s_out; - - for i=0 to 4 - { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - -#[returnaddress="stack"] -fn _isha3_256_32(reg ptr u8[32] out, reg ptr u8[KYBER_SYMBYTES] in) -> reg ptr u8[32] -{ - stack u64[25] state; - stack ptr u8[32] s_out; - reg u64 t64; - inline int i; - - s_out = out; - - state = __st0(state); - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = in[u64 i]; - state[u64 i] = t64; - } - - state[u8 KYBER_SYMBYTES] ^= 0x06; - state[u8 SHA3_256_RATE - 1] = 0x80; - - state = __keccakf1600_ref(state); - - out = s_out; - - for i=0 to 4 - { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - - -#[returnaddress="stack"] -fn _sha3_512_64(reg ptr u8[64] out, reg const ptr u8[64] in) -> stack u8[64] -{ - stack u64[25] state; - stack ptr u8[64] out_s; - reg u64 t64; - inline int i; - - state = __st0(state); - - for i = 0 to 8 - { - t64 = in[u64 i]; - state[i] ^= t64; - } - - state[u8 64] ^= 0x06; - state[u8 SHA3_512_RATE - 1] ^= 0x80; - - out_s = out; - - state = __keccakf1600_ref(state); - - out = out_s; - - for i = 0 to 8 - { - t64 = state[i]; - out[u64 i] = t64; - } - - return out; -} - -#[returnaddress="stack"] -fn _shake256_64(reg u64 out outlen, reg const ptr u8[64] in) -{ - reg u64 t64 j; - reg u8 c; - stack u64[25] state; - stack u64 s_out s_outlen; - inline int i; - - s_out = out; - s_outlen = outlen; - - state = __st0(state); - - for i = 0 to 8 { - t64 = in[u64 i]; - state[u64 i] ^= t64; - } - - state[u8 64] ^= 0x1f; - state[u8 SHAKE256_RATE-1] ^= 0x80; - - state = __keccakf1600_ref(state); - - outlen = s_outlen; - out = s_out; - - while(outlen > SHAKE256_RATE) - { - for i = 0 to SHAKE256_RATE/8 - { - t64 = state[u64 i]; - (u64)[out + 8*i] = t64; - } - - out += SHAKE256_RATE; - outlen -= SHAKE256_RATE; - - s_out = out; - s_outlen = outlen; - - state = __keccakf1600_ref(state); - - outlen = s_outlen; - out = s_out; - } - - s_outlen = outlen; - outlen >>= 3; - j = 0; - while(j < outlen) - { - t64 = state[(int) j]; - (u64)[out + 8 * j] = t64; - j = j + 1; - } - - j <<= 3; - outlen = s_outlen; - - while (j < outlen) - { - c = state[u8 (int) j]; - (u8)[out + j] = c; - j = j + 1; - } -} diff --git a/code/jasmin/ref/gen_matrix.jazz b/code/jasmin/ref/gen_matrix.jazz deleted file mode 100644 index 2f2c9785..00000000 --- a/code/jasmin/ref/gen_matrix.jazz +++ /dev/null @@ -1,26 +0,0 @@ -require "gen_matrix.jinc" - -export fn gen_matrix_jazz(reg u64 ap, reg u64 seedp, reg u64 transposed) -{ - stack u16[KYBER_K * KYBER_VECN] a; - stack u8[KYBER_SYMBYTES] seed; - reg u8 c; - reg u16 t; - inline int i; - reg u64 pp; - - for i = 0 to KYBER_SYMBYTES - { - c = (u8)[seedp + i]; - seed[i] = c; - } - - a = __gen_matrix(seed, transposed); - - pp = ap; - for i = 0 to KYBER_K*KYBER_VECN - { - t = a[i]; - (u16)[pp + 2*i] = t; - } -} diff --git a/code/jasmin/ref/gen_matrix.jinc b/code/jasmin/ref/gen_matrix.jinc deleted file mode 100644 index 8208e247..00000000 --- a/code/jasmin/ref/gen_matrix.jinc +++ /dev/null @@ -1,126 +0,0 @@ -require "fips202.jinc" -require "params.jinc" - -inline -fn __rej_uniform(stack u16[KYBER_N] rp, reg u64 offset, stack u8[SHAKE128_RATE] buf) -> reg u64, stack u16[KYBER_N] -{ - reg u16 val1 val2; - reg u16 t; - reg u64 pos ctr; - reg u64 cnd0 cnd1 exit; - - - ctr = offset; - pos = 0; - exit = 0; - - while(exit == 0) - { - val1 = (16u)buf[(int)pos]; - pos += 1; - t = (16u)buf[(int)pos]; - val2 = t; - val2 >>= 4; - t &= 0x0F; - t <<= 8; - val1 |= t; - pos += 1; - - t = (16u)buf[(int)pos]; - t <<= 4; - val2 |= t; - pos += 1; - - if(val1 < KYBER_Q) - { - rp[(int)ctr] = val1; - ctr += 1; - } - - if(val2 < KYBER_Q) - { - if(ctr < KYBER_N) - { - rp[(int)ctr] = val2; - ctr += 1; - } - } - - // Check if we should exit the loop - cnd0 = KYBER_N; - cnd0 -= ctr; - cnd0 -= 1; - cnd1 = SHAKE128_RATE; - cnd1 -= pos; - cnd1 -= 3; //TODO: (potentially) wasting 2 'good' bytes - exit = cnd0 | cnd1; - exit >>= 63; - } - - return ctr, rp; -} - -inline -fn __gen_matrix(stack u8[KYBER_SYMBYTES] seed, reg u64 transposed) -> stack u16[KYBER_K*KYBER_VECN] -{ - stack u8[34] extseed; - stack u8[SHAKE128_RATE] buf; - stack u64[25] state; - stack u16[KYBER_N] poly; - stack u16[KYBER_K*KYBER_VECN] r; - - reg u8 c; - reg u16 t; - reg u64 ctr k l; - stack u64 sctr; - stack u64 stransposed; - inline int j i; - - stransposed = transposed; - - for j = 0 to KYBER_SYMBYTES - { - c = seed[j]; - extseed[j] = c; - } - - for i=0 to KYBER_K - { - for j = 0 to KYBER_K - { - transposed = stransposed; - if(transposed == 0) - { - extseed[KYBER_SYMBYTES] = j; - extseed[KYBER_SYMBYTES+1] = i; - } - else - { - extseed[KYBER_SYMBYTES] = i; - extseed[KYBER_SYMBYTES+1] = j; - } - - state = _shake128_absorb34(state, extseed); - - ctr = 0; - while (ctr < KYBER_N) - { - sctr = ctr; - state, buf = _shake128_squeezeblock(state, buf); - ctr = sctr; - ctr, poly = __rej_uniform(poly, ctr, buf); - } - - k = 0; - l = i * KYBER_VECN + j * KYBER_N; - while (k < KYBER_N) - { - t = poly[(int) k]; - r[(int) l] = t; - k += 1; - l += 1; - } - } - } - return r; -} diff --git a/code/jasmin/ref/indcpa.c b/code/jasmin/ref/indcpa.c deleted file mode 100644 index d73f8e5f..00000000 --- a/code/jasmin/ref/indcpa.c +++ /dev/null @@ -1,321 +0,0 @@ -#include -#include "indcpa.h" -#include "poly.h" -#include "polyvec.h" -#include "ntt.h" -#include "symmetric.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: unsigned char *r: pointer to the output serialized public key -* const poly *pk: pointer to the input public-key polynomial -* const unsigned char *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(unsigned char *r, polyvec *pk, const unsigned char *seed) -{ - int i; - polyvec_tobytes(r, pk); - for(i=0;i> 4) | ((uint16_t)buf[pos+1] << 4)); - pos += 2; - - if(val1 < KYBER_Q) - { - r[ctr++] = (int16_t)val1; - } - - if(val2 < KYBER_Q && ctr < len) { - r[ctr++] = (int16_t)val2; - } - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const unsigned char *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -static void gen_matrix(polyvec *a, const unsigned char *seed, int transposed) // Not static for benchmarking -{ - unsigned int ctr, i, j; - const unsigned int maxnblocks=(530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES; /* 530 is expected number of required bytes */ - unsigned char buf[XOF_BLOCKBYTES*maxnblocks+1]; - xof_state state; - - for(i=0;i - -void indcpa_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - - - -void indcpa_keypair_jazz(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void indcpa_enc_jazz(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void indcpa_dec_jazz(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/ref/indcpa.jinc b/code/jasmin/ref/indcpa.jinc deleted file mode 100644 index 4b0595c2..00000000 --- a/code/jasmin/ref/indcpa.jinc +++ /dev/null @@ -1,248 +0,0 @@ -require "params.jinc" -require "poly.jinc" -require "polyvec.jinc" -require "gen_matrix.jinc" - -inline -fn __indcpa_keypair(reg u64 pkp, reg u64 skp, reg ptr u8[KYBER_SYMBYTES] randomnessp) -{ - stack u16[KYBER_K * KYBER_VECN] a; - stack u16[KYBER_VECN] e pkpv skpv; - stack u8[64] buf; - stack u8[KYBER_SYMBYTES] publicseed noiseseed; - stack u8[32] inbuf; - reg u64 t64; - reg u64 zero; - reg u8 nonce; - inline int i; - - stack u64 spkp; - stack u64 sskp; - - spkp = pkp; - sskp = skp; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = randomnessp[u64 i]; - inbuf[u64 i] = t64; - } - - buf = _sha3512_32(buf, inbuf); - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = buf[u64 i]; - publicseed[u64 i] = t64; - t64 = buf[u64 i + KYBER_SYMBYTES/8]; - noiseseed[u64 i] = t64; - } - - zero = 0; - a = __gen_matrix(publicseed, zero); - - nonce = 0; - skpv[0:KYBER_N] = _poly_getnoise(skpv[0:KYBER_N], noiseseed, nonce); - nonce = 1; - skpv[KYBER_N:KYBER_N] = _poly_getnoise(skpv[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 2; - skpv[2*KYBER_N:KYBER_N] = _poly_getnoise(skpv[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 3; - e[0:KYBER_N] = _poly_getnoise(e[0:KYBER_N], noiseseed, nonce); - nonce = 4; - e[KYBER_N:KYBER_N] = _poly_getnoise(e[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 5; - e[2*KYBER_N:KYBER_N] = _poly_getnoise(e[2*KYBER_N:KYBER_N], noiseseed, nonce); - - skpv = __polyvec_ntt(skpv); - e = __polyvec_ntt(e); - - pkpv[0:KYBER_N] = __polyvec_pointwise_acc(a[0:KYBER_VECN], skpv); - pkpv[0:KYBER_N] = _poly_frommont(pkpv[0:KYBER_N]); - pkpv[KYBER_N:KYBER_N] = __polyvec_pointwise_acc(a[KYBER_VECN:KYBER_VECN], skpv); - pkpv[KYBER_N:KYBER_N] = _poly_frommont(pkpv[KYBER_N:KYBER_N]); - pkpv[2*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(a[2*KYBER_VECN:KYBER_VECN], skpv); - pkpv[2*KYBER_N:KYBER_N] = _poly_frommont(pkpv[2*KYBER_N:KYBER_N]); - - pkpv = __polyvec_add2(pkpv, e); - pkpv = __polyvec_reduce(pkpv); - - pkp = spkp; - skp = sskp; - - __polyvec_tobytes(skp, skpv); - __polyvec_tobytes(pkp, pkpv); - - pkp += KYBER_POLYVECBYTES; - for i=0 to KYBER_SYMBYTES/8 - { - t64 = publicseed[u64 i]; - (u64)[pkp] = t64; - pkp += 8; - } -} - -inline -fn __indcpa_enc(stack u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[KYBER_SYMBYTES] noiseseed) -{ - stack u16[KYBER_VECN] pkpv sp ep bp; - stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k poly epp v poly0 poly1 poly2; - stack u8[KYBER_SYMBYTES] publicseed; - reg u64 i j t64; - reg u64 ctp; - reg u16 t; - reg u8 nonce; - - pkpv = __polyvec_frombytes(pkp); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES/8) - { - t64 = (u64)[pkp]; - publicseed.[u64 8 * (int)i] = t64; - pkp += 8; - i += 1; - } - - k = _i_poly_frommsg(k, msgp); - - aat = __gen_matrix(publicseed, 1); - - nonce = 0; - sp[0:KYBER_N] = _poly_getnoise(sp[0:KYBER_N], noiseseed, nonce); - nonce = 1; - sp[KYBER_N:KYBER_N] = _poly_getnoise(sp[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 2; - sp[2*KYBER_N:KYBER_N] = _poly_getnoise(sp[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 3; - ep[0:KYBER_N] = _poly_getnoise(ep[0:KYBER_N], noiseseed, nonce); - nonce = 4; - ep[KYBER_N:KYBER_N] = _poly_getnoise(ep[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 5; - ep[2*KYBER_N:KYBER_N] = _poly_getnoise(ep[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 6; - epp = _poly_getnoise(epp, noiseseed, nonce); - - sp = __polyvec_ntt(sp); - - bp[0:KYBER_N] = __polyvec_pointwise_acc(aat[0:KYBER_VECN], sp); - bp[KYBER_N:KYBER_N]= __polyvec_pointwise_acc(aat[KYBER_VECN:KYBER_VECN], sp); - bp[2*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(aat[2*KYBER_VECN:KYBER_VECN], sp); - - v = __polyvec_pointwise_acc(pkpv, sp); - - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); - - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); - - ctp = sctp; - __polyvec_compress(ctp, bp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_compress(ctp, v); -} - -inline -fn __iindcpa_enc(reg ptr u8[KYBER_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[KYBER_SYMBYTES] noiseseed) -> reg ptr u8[KYBER_CT_LEN] -{ - stack u16[KYBER_VECN] pkpv sp ep bp; - stack u16[KYBER_K*KYBER_VECN] aat; - stack u16[KYBER_N] k poly epp v poly0 poly1 poly2; - stack u8[KYBER_SYMBYTES] publicseed; - reg u64 i j t64; - reg u16 t; - reg u8 nonce; - stack ptr u8[KYBER_CT_LEN] sctp; - - sctp = ctp; - - pkpv = __polyvec_frombytes(pkp); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES/8) - { - t64 = (u64)[pkp]; - publicseed.[u64 8*(int)i] = t64; - pkp += 8; - i += 1; - } - - k = _i_poly_frommsg(k, msgp); - - aat = __gen_matrix(publicseed, 1); - - nonce = 0; - sp[0:KYBER_N] = _poly_getnoise(sp[0:KYBER_N], noiseseed, nonce); - nonce = 1; - sp[KYBER_N:KYBER_N] = _poly_getnoise(sp[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 2; - sp[2*KYBER_N:KYBER_N] = _poly_getnoise(sp[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 3; - ep[0:KYBER_N] = _poly_getnoise(ep[0:KYBER_N], noiseseed, nonce); - nonce = 4; - ep[KYBER_N:KYBER_N] = _poly_getnoise(ep[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 5; - ep[2*KYBER_N:KYBER_N] = _poly_getnoise(ep[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 6; - epp = _poly_getnoise(epp, noiseseed, nonce); - - sp = __polyvec_ntt(sp); - - bp[0:KYBER_N] = __polyvec_pointwise_acc(aat[0:KYBER_VECN], sp); - bp[KYBER_N:KYBER_N]= __polyvec_pointwise_acc(aat[KYBER_VECN:KYBER_VECN], sp); - bp[2*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(aat[2*KYBER_VECN:KYBER_VECN], sp); - - v = __polyvec_pointwise_acc(pkpv, sp); - - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); - - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); - - ctp = sctp; - ctp[0:KYBER_POLYVECCOMPRESSEDBYTES] = __i_polyvec_compress(ctp[0:KYBER_POLYVECCOMPRESSEDBYTES], bp); - ctp[KYBER_POLYVECCOMPRESSEDBYTES:KYBER_POLYCOMPRESSEDBYTES], v = _i_poly_compress(ctp[KYBER_POLYVECCOMPRESSEDBYTES:KYBER_POLYCOMPRESSEDBYTES], v); - - return ctp; -} - - -inline -fn __indcpa_dec(reg ptr u8[KYBER_MSGBYTES] msgp, reg u64 ctp, reg u64 skp) -> reg ptr u8[KYBER_N/8] -{ - stack u16[KYBER_N] t v mp; - stack u16[KYBER_VECN] bp skpv; - - bp = __polyvec_decompress(ctp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_decompress(v, ctp); - - skpv = __polyvec_frombytes(skp); - - bp = __polyvec_ntt(bp); - t = __polyvec_pointwise_acc(skpv, bp); - t = _poly_invntt(t ); - - mp = _poly_sub(mp, v, t); - mp = __poly_reduce(mp); - - msgp, mp = _i_poly_tomsg(msgp, mp); - - return msgp; -} diff --git a/code/jasmin/ref/jbench.sh b/code/jasmin/ref/jbench.sh deleted file mode 100755 index c7b01104..00000000 --- a/code/jasmin/ref/jbench.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -#exec compile.bench - -ulimit -s 50000 - -rm compile.bench - -for arg in -until_typing -until_cstexp -until_inline -until_rmfunc -until_unroll -until_splitting -until_valloc -until_vallocd -until_vshare -until_vshared -until_arrexp -until_rmarrinit -until_rmglobals -until_arrexp -until_makeref -until_lowering -until_stkalloc -until_ralloc -until_rallocd -until_linear -until_asm; do - - echo "=====================================================" >> compile.bench - echo "===== Benchmark with flag $arg" >> compile.bench - echo "=====================================================" >> compile.bench - - make clean - export JADDFLAGS=$arg - #(time make jindcpa.s) 2>compile.bench - (time make jpolyvec.s 2>&1) 2>>compile.bench -done - - diff --git a/code/jasmin/ref/jfips202.jazz b/code/jasmin/ref/jfips202.jazz deleted file mode 100644 index f0e0040f..00000000 --- a/code/jasmin/ref/jfips202.jazz +++ /dev/null @@ -1,103 +0,0 @@ -require "params.jinc" -require "fips202.jinc" - -export fn shake256_128_33_jazz(reg u64 outp inp) -{ - stack u8[33] in; - stack u8[128] out; - stack u64 soutp; - reg u8 c; - inline int i; - - for i = 0 to 33 { - c = (u8)[inp + i]; - in[i] = c; - } - - soutp = outp; - out = _shake256_128_33(out, in); - outp = soutp; - - for i = 0 to 128 { - c = out[i]; - (u8)[outp + i] = c; - } -} - -export fn sha3512_32_jazz(reg u64 outp inp) -{ - stack u8[32] in; - stack u8[64] out; - stack u64 soutp; - reg u8 c; - inline int i; - - for i = 0 to 32 { - c = (u8)[inp + i]; - in[i] = c; - } - - soutp = outp; - out = _sha3512_32(out, in); - outp = soutp; - for i = 0 to 64 { - c = out[i]; - (u8)[outp + i] = c; - } -} - - -export fn shake128_absorb34_jazz(reg u64 statep, reg u64 inp) -{ - stack u64[25] state; - stack u8[34] in; - reg u8 c; - reg u64 t; - inline int i; - - for i = 0 to 34 { - c = (u8)[inp + i]; - in[i] = c; - } - - state = _shake128_absorb34(state, in); - - for i = 0 to 25 { - t = state[i]; - [statep + 8*i] = t; - } -} - -export fn shake128_squeezeblock_jazz(reg u64 outp, reg u64 statep) -{ - stack u64[25] state; - stack u8[SHAKE128_RATE] out; - reg u8 c; - reg u64 t; - inline int i; - stack u64 soutp; - stack u64 sstatep; - - for i = 0 to 25 { - t = [statep + 8*i]; - state[i] = t; - } - - soutp = outp; - sstatep = statep; - - state, out = _shake128_squeezeblock(state, out); - - outp = soutp; - statep = sstatep; - - for i = 0 to 25 { - t = state[i]; - [statep + 8*i] = t; - } - - for i = 0 to SHAKE128_RATE { - c = out[i]; - (u8)[outp + i] = c; - } -} diff --git a/code/jasmin/ref/jindcpa.jazz b/code/jasmin/ref/jindcpa.jazz deleted file mode 100644 index a65f62d4..00000000 --- a/code/jasmin/ref/jindcpa.jazz +++ /dev/null @@ -1,195 +0,0 @@ -require "params.jinc" -require "poly.jinc" -require "polyvec.jinc" -require "gen_matrix.jinc" - - -export fn indcpa_keypair_jazz(reg u64 pkp, reg u64 skp, reg u64 randomnessp) -{ - stack u16[KYBER_K * KYBER_VECN] a; - stack u16[KYBER_VECN] e pkpv skpv; - stack u8[64] buf; - stack u8[KYBER_SYMBYTES] publicseed noiseseed; - stack u8[32] inbuf; - reg u8 c; - reg u64 zero; - reg u8 nonce; - reg u64 i, j; - - stack u64 spkp; - stack u64 sskp; - - spkp = pkp; - sskp = skp; - - i = 0; - while (i < KYBER_SYMBYTES) - { - c = (u8)[randomnessp + i]; - inbuf[(int) i] = c; - i += 1; - } - - buf = _sha3512_32(buf, inbuf); - - i = 0; - j = KYBER_SYMBYTES; - while (i < KYBER_SYMBYTES) - { - c = buf[(int)i]; - publicseed[(int)i] = c; - c = buf[(int)j]; - noiseseed[(int)i] = c; - i += 1; - j += 1; - } - - zero = 0; - a = __gen_matrix(publicseed, zero); - - nonce = 0; - skpv[0:KYBER_N] = _poly_getnoise(skpv[0:KYBER_N], noiseseed, nonce); - nonce = 1; - skpv[KYBER_N:KYBER_N] = _poly_getnoise(skpv[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 2; - skpv[2*KYBER_N:KYBER_N] = _poly_getnoise(skpv[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 3; - e[0:KYBER_N] = _poly_getnoise(e[0:KYBER_N], noiseseed, nonce); - nonce = 4; - e[KYBER_N:KYBER_N] = _poly_getnoise(e[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 5; - e[2*KYBER_N:KYBER_N] = _poly_getnoise(e[2*KYBER_N:KYBER_N], noiseseed, nonce); - - skpv = __polyvec_ntt(skpv); - e = __polyvec_ntt(e); - - pkpv[0:KYBER_N] = __polyvec_pointwise_acc(a[0:KYBER_VECN], skpv); - pkpv[0:KYBER_N] = _poly_frommont(pkpv[0:KYBER_N]); - pkpv[KYBER_N:KYBER_N] = __polyvec_pointwise_acc(a[KYBER_VECN:KYBER_VECN], skpv); - pkpv[KYBER_N:KYBER_N] = _poly_frommont(pkpv[KYBER_N:KYBER_N]); - pkpv[2*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(a[2*KYBER_VECN:KYBER_VECN], skpv); - pkpv[2*KYBER_N:KYBER_N] = _poly_frommont(pkpv[2*KYBER_N:KYBER_N]); - - pkpv = __polyvec_add2(pkpv, e); - pkpv = __polyvec_reduce(pkpv); - - pkp = spkp; - skp = sskp; - - __polyvec_tobytes(skp, skpv); - __polyvec_tobytes(pkp, pkpv); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES) - { - c = publicseed[(int)i]; - (u8)[pkp] = c; - pkp += 1; - i += 1; - } -} - - -export fn indcpa_enc_jazz(reg u64 ctp, reg u64 msgp, reg u64 pkp, reg u64 coinsp) -{ - stack u16[KYBER_K * KYBER_VECN] at; - stack u16[KYBER_VECN] pkpv sp ep bp; - stack u16[KYBER_N] k poly epp v; - stack u8[KYBER_SYMBYTES] publicseed; - stack u8[KYBER_SYMBYTES] noiseseed; - reg u64 i j one; - reg u16 t; - reg u8 c nonce; - stack u64 sctp; - - sctp = ctp; - - i = 0; - while (i < KYBER_SYMBYTES) - { - c = (u8)[coinsp+i]; - noiseseed[(int)i] = c; - i += 1; - } - - pkpv = __polyvec_frombytes(pkp); - - i = 0; - pkp += KYBER_POLYVECBYTES; - while (i < KYBER_SYMBYTES) - { - c = (u8)[pkp]; - publicseed[(int)i] = c; - pkp += 1; - i += 1; - } - - k = _poly_frommsg(k, msgp); - - one = 1; - at = __gen_matrix(publicseed, one); - - nonce = 0; - sp[0:KYBER_N] = _poly_getnoise(sp[0:KYBER_N], noiseseed, nonce); - nonce = 1; - sp[KYBER_N:KYBER_N] = _poly_getnoise(sp[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 2; - sp[2*KYBER_N:KYBER_N] = _poly_getnoise(sp[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 3; - ep[0:KYBER_N] = _poly_getnoise(ep[0:KYBER_N], noiseseed, nonce); - nonce = 4; - ep[KYBER_N:KYBER_N] = _poly_getnoise(ep[KYBER_N:KYBER_N], noiseseed, nonce); - nonce = 5; - ep[2*KYBER_N:KYBER_N] = _poly_getnoise(ep[2*KYBER_N:KYBER_N], noiseseed, nonce); - - nonce = 6; - epp = _poly_getnoise(epp, noiseseed, nonce); - - sp = __polyvec_ntt(sp); - - bp[0:KYBER_N] = __polyvec_pointwise_acc(at[0:KYBER_VECN], sp); - bp[KYBER_N:KYBER_N] = __polyvec_pointwise_acc(at[KYBER_VECN:KYBER_VECN], sp); - bp[2*KYBER_N:KYBER_N] = __polyvec_pointwise_acc(at[2*KYBER_VECN:KYBER_VECN], sp); - - v = __polyvec_pointwise_acc(pkpv, sp); - - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); - - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); - - ctp = sctp; - __polyvec_compress(ctp, bp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_compress(ctp, v); -} - - - -export fn indcpa_dec_jazz(reg u64 msgp, reg u64 ctp, reg u64 skp) -{ - stack u16[KYBER_N] t v mp; - stack u16[KYBER_VECN] bp skpv; - - bp = __polyvec_decompress(ctp); - ctp += KYBER_POLYVECCOMPRESSEDBYTES; - v = _poly_decompress(v, ctp); - - skpv = __polyvec_frombytes(skp); - - bp = __polyvec_ntt(bp); - t = __polyvec_pointwise_acc(skpv, bp); - t = _poly_invntt(t ); - - mp = _poly_sub(mp, v, t); - mp = __poly_reduce(mp); - - mp = _poly_tomsg(msgp, mp); -} diff --git a/code/jasmin/ref/jkem.jazz b/code/jasmin/ref/jkem.jazz deleted file mode 100644 index cb770b60..00000000 --- a/code/jasmin/ref/jkem.jazz +++ /dev/null @@ -1,86 +0,0 @@ -require "kem.jinc" - -export fn jade_kem_kyber_kyber768_amd64_ref_keypair_derand(reg u64 public_key secret_key fixedrand) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES*2] randomness; - reg ptr u8[KYBER_SYMBYTES*2] randomnessp; - inline int i; - - public_key = public_key; - secret_key = secret_key; - - for i = 0 to KYBER_SYMBYTES*2 - { - randomness[i] = (u8)[fixedrand + i]; - } - - randomnessp = randomness; - __crypto_kem_keypair_jazz(public_key, secret_key, randomnessp); - ?{}, r = #set0(); - return r; -} - -export fn jade_kem_kyber_kyber768_amd64_ref_enc_derand(reg u64 ciphertext shared_secret public_key fixedrand) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES] randomness; - reg ptr u8[KYBER_SYMBYTES] randomnessp; - inline int i; - - ciphertext = ciphertext; - shared_secret = shared_secret; - public_key = public_key; - - for i = 0 to KYBER_SYMBYTES*2 { - randomness[i] = (u8)[fixedrand + i]; - } - - randomnessp = randomness; - __crypto_kem_enc_jazz(ciphertext, shared_secret, public_key, randomnessp); - ?{}, r = #set0(); - return r; -} - - -export fn jade_kem_kyber_kyber768_amd64_ref_keypair(reg u64 public_key secret_key) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES*2] randomness; - reg ptr u8[KYBER_SYMBYTES*2] randomnessp; - - public_key = public_key; - secret_key = secret_key; - - randomnessp = randomness; - randomnessp = #randombytes(randomnessp); - __crypto_kem_keypair_jazz(public_key, secret_key, randomnessp); - ?{}, r = #set0(); - return r; -} - -export fn jade_kem_kyber_kyber768_amd64_ref_enc(reg u64 ciphertext shared_secret public_key) -> reg u64 -{ - reg u64 r; - stack u8[KYBER_SYMBYTES] randomness; - reg ptr u8[KYBER_SYMBYTES] randomnessp; - - ciphertext = ciphertext; - shared_secret = shared_secret; - public_key = public_key; - - randomnessp = randomness; - randomnessp = #randombytes(randomnessp); - __crypto_kem_enc_jazz(ciphertext, shared_secret, public_key, randomnessp); - ?{}, r = #set0(); - return r; -} - -export fn jade_kem_kyber_kyber768_amd64_ref_dec(reg u64 shared_secret ciphertext secret_key) -> reg u64 -{ - reg u64 r; - __crypto_kem_dec_jazz(shared_secret, ciphertext, secret_key); - ?{}, r = #set0(); - return r; -} - diff --git a/code/jasmin/ref/jpoly.jazz b/code/jasmin/ref/jpoly.jazz deleted file mode 100644 index 7a855d30..00000000 --- a/code/jasmin/ref/jpoly.jazz +++ /dev/null @@ -1,288 +0,0 @@ -require "params.jinc" -require "poly.jinc" - -/* These exported functions are just for unit testing */ - -export fn poly_compress_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_N] a; - stack u8[128] r; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - a = _poly_compress(rp, a); -} - -export fn poly_decompress_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_N] r; - stack u8[128] a; - - r = _poly_decompress(r, ap); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_tobytes_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_N] a; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - a = _poly_tobytes(rp, a); -} - -export fn poly_frombytes_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_N] r; - - r = _poly_frombytes(r, ap); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_tomsg_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_N] a; - stack u8[32] r; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - a = _poly_tomsg(rp, a); -} - -export fn poly_frommsg_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_N] r; - - r = _poly_frommsg(r, ap); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - - - -export fn poly_add2_jazz(reg u64 rp, reg u64 bp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] r; - stack u16[KYBER_N] b; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r = _poly_add2(r, b); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_sub_jazz(reg u64 rp, reg u64 ap, reg u64 bp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] a; - stack u16[KYBER_N] b; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r = _poly_sub(r, a, b); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_ntt_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _poly_ntt(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_invntt_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _poly_invntt(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_basemul_jazz(reg u64 rp, reg u64 ap, reg u64 bp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] a; - stack u16[KYBER_N] b; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_N { - t = (u16)[ap + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r[0] = 0; - r = _poly_basemul(r, a, b); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_frommont_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _poly_frommont(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_getnoise_jazz(reg u64 rp, reg u64 seedp, reg u8 nonce) -{ - reg u8 d; - reg u16 t; - stack u16[KYBER_N] r; - stack u8[KYBER_SYMBYTES] seed; - inline int i; - stack u64 srp; - - srp = rp; - - for i = 0 to KYBER_SYMBYTES { - d = (u8)[seedp + i]; - seed[i] = d; - } - - r = _poly_getnoise(r, seed, nonce); - - rp = srp; - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_reduce_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __poly_reduce(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn poly_csubq_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_N { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = _poly_csubq(r); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} diff --git a/code/jasmin/ref/jpolyvec.jazz b/code/jasmin/ref/jpolyvec.jazz deleted file mode 100644 index 7163b70f..00000000 --- a/code/jasmin/ref/jpolyvec.jazz +++ /dev/null @@ -1,189 +0,0 @@ -require "params.jinc" -require "polyvec.jinc" - -/* These exported functions are just for unit testing */ - -export fn polyvec_tobytes_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_VECN] a; - - for i = 0 to KYBER_VECN { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - __polyvec_tobytes(rp, a); -} - - -export fn polyvec_decompress_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_VECN] r; - - r = __polyvec_decompress(ap); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - - -export fn polyvec_compress_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_VECN] a; - - for i = 0 to KYBER_VECN { - t = (u16)[ap + 2*i]; - a[i] = t; - } - - __polyvec_compress(rp, a); -} - - -export fn polyvec_frombytes_jazz(reg u64 rp, reg u64 ap) -{ - inline int i; - reg u16 t; - reg u8 c; - stack u16[KYBER_VECN] r; - - r = __polyvec_frombytes(ap); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_add2_jazz(reg u64 rp, reg u64 bp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_VECN] a; - stack u16[KYBER_VECN] b; - stack u16[KYBER_VECN] r; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r = __polyvec_add2(a, b); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_pointwise_acc_jazz(reg u64 rp, reg u64 ap, reg u64 bp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_VECN] a; - stack u16[KYBER_VECN] b; - stack u16[KYBER_N] r; - - for i = 0 to KYBER_VECN { - t = (u16)[ap + 2*i]; - a[i] = t; - t = (u16)[bp + 2*i]; - b[i] = t; - } - - r = __polyvec_pointwise_acc(a, b); - - for i = 0 to KYBER_N { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_ntt_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_VECN] r; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __polyvec_ntt(r); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_invntt_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_VECN] r; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __polyvec_invntt(r); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_csubq_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_VECN] r; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __polyvec_csubq(r); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} - -export fn polyvec_reduce_jazz(reg u64 rp) -{ - inline int i; - reg u16 t; - stack u16[KYBER_VECN] r; - - for i = 0 to KYBER_VECN { - t = (u16)[rp + 2*i]; - r[i] = t; - } - - r = __polyvec_reduce(r); - - for i = 0 to KYBER_VECN { - t = r[i]; - (u16)[rp + 2*i] = t; - } -} diff --git a/code/jasmin/ref/kem.c b/code/jasmin/ref/kem.c deleted file mode 100644 index bc3471bc..00000000 --- a/code/jasmin/ref/kem.c +++ /dev/null @@ -1,141 +0,0 @@ -#include -#include -#include -#include "kem.h" -#include "indcpa.h" -#include "symmetric.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -uint64_t verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint8_t r; - - r = 0; - for(i=0; i < len; i ++) - r |= a[i] ^ b[i]; - - return (-(uint64_t)r) >> 63; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - - b = -b; - for(i=0; i < len; i++) - r[i] ^= b & (r[i] ^ x[i]); -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key for the CCA-secure -* Kyber key encapsulation mechanism -* -* Arguments: - unsigned char *pk: pointer to output public key -* - unsigned char *sk: pointer to output private key -**************************************************/ -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness) -{ - indcpa_keypair(pk, sk, randomness); - - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_INDCPA_PUBLICKEYBYTES); - - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, randomness + KYBER_SYMBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - unsigned char *c: pointer to output ciphertext (of length KYBER_INDCPA_BYTES bytes) -* - const unsigned char *m: pointer to input message (of length KYBER_INDCPA_MSGBYTES bytes) -* - const unsigned char *pk: pointer to input public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - const unsigned char *coin: pointer to input random coins used as seed (of length KYBER_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void crypto_kem_enc(unsigned char *ct, - unsigned char *ss, - const unsigned char *pk, - const unsigned char *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - uint8_t kr[2*KYBER_SYMBYTES]; - - hash_h(buf, coins, KYBER_SYMBYTES); - - hash_h(buf + KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); - - kdf(ss, kr, 2*KYBER_SYMBYTES); -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - unsigned char *m: pointer to output decrypted message (of length KYBER_INDCPA_MSGBYTES) -* - const unsigned char *c: pointer to input ciphertext (of length KYBER_INDCPA_BYTES) -* - const unsigned char *sk: pointer to input secret key (of length KYBER_INDCPA_SECRETKEYBYTES) -**************************************************/ -void crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES]; - uint64_t cnd; - const uint8_t *pk = sk + KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - cnd = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); - - cmov(kr, sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, KYBER_SYMBYTES, cnd); - - kdf(ss, kr, 2*KYBER_SYMBYTES); -} diff --git a/code/jasmin/ref/kem.h b/code/jasmin/ref/kem.h deleted file mode 100644 index 8aa49391..00000000 --- a/code/jasmin/ref/kem.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef CRYPTO_KEM_H -#define CRYPTO_KEM_H - -#include - -void crypto_kem_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void crypto_kem_enc(unsigned char *c, - unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - -void crypto_kem_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - -void jade_kem_kyber_kyber768_amd64_ref_keypair_derand(unsigned char *pk, - unsigned char *sk, - const unsigned char *randomness); - -void jade_kem_kyber_kyber768_amd64_ref_enc_derand(unsigned char *c, - const unsigned char *m, - const unsigned char *pk, - const unsigned char *coins); - - -void jade_kem_kyber_kyber768_amd64_ref_keypair(unsigned char *pk, - unsigned char *sk); - -void jade_kem_kyber_kyber768_amd64_ref_enc(unsigned char *c, - const unsigned char *m, - const unsigned char *pk); - -void jade_kem_kyber_kyber768_amd64_ref_dec(unsigned char *m, - const unsigned char *c, - const unsigned char *sk); - - -#endif diff --git a/code/jasmin/ref/kem.jinc b/code/jasmin/ref/kem.jinc deleted file mode 100644 index c9624a7b..00000000 --- a/code/jasmin/ref/kem.jinc +++ /dev/null @@ -1,143 +0,0 @@ -require "indcpa.jinc" -require "verify.jinc" - -inline -fn __crypto_kem_keypair_jazz(reg u64 pkp, reg u64 skp, reg ptr u8[KYBER_SYMBYTES*2] randomnessp) -{ - stack ptr u8[KYBER_SYMBYTES*2] s_randomnessp; - reg ptr u8[KYBER_SYMBYTES] randomnessp1 randomnessp2; - - stack u8[32] h_pk; - stack u64 s_skp s_pkp; - reg u64 t64; - inline int i; - - s_randomnessp = randomnessp; - s_pkp = pkp; - s_skp = skp; - - randomnessp1 = randomnessp[0:KYBER_SYMBYTES]; - __indcpa_keypair(pkp, skp, randomnessp1); - - skp = s_skp; - skp += KYBER_POLYVECBYTES; - pkp = s_pkp; - - for i=0 to KYBER_INDCPA_PUBLICKEYBYTES/8 - { - t64 = (u64)[pkp + 8*i]; - (u64)[skp] = t64; - skp += 8; - } - - s_skp = skp; - pkp = s_pkp; - t64 = KYBER_POLYVECBYTES + KYBER_SYMBYTES; - h_pk = _isha3_256(h_pk, pkp, t64); - skp = s_skp; - - for i=0 to 4 - { - t64 = h_pk[u64 i]; - (u64)[skp] = t64; - skp += 8; - } - - randomnessp = s_randomnessp; - randomnessp2 = randomnessp[KYBER_SYMBYTES:KYBER_SYMBYTES]; - for i=0 to KYBER_SYMBYTES/8 - { - t64 = randomnessp2[u64 i]; - (u64)[skp] = t64; - skp += 8; - } -} - -inline -fn __crypto_kem_enc_jazz(reg u64 ctp, reg u64 shkp, reg u64 pkp, reg ptr u8[KYBER_SYMBYTES] randomnessp) -{ - inline int i; - - stack u8[KYBER_SYMBYTES * 2] buf kr; - stack u64 s_pkp s_ctp s_shkp; - reg u64 t64; - - s_pkp = pkp; - s_ctp = ctp; - s_shkp = shkp; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = randomnessp[u64 i]; - kr[u64 i] = t64; - } - - buf[0:KYBER_SYMBYTES] = _isha3_256_32(buf[0:KYBER_SYMBYTES], kr[0:KYBER_SYMBYTES]); - - pkp = s_pkp; - - t64 = KYBER_PUBLICKEYBYTES; - buf[KYBER_SYMBYTES:KYBER_SYMBYTES] = _isha3_256(buf[KYBER_SYMBYTES:KYBER_SYMBYTES], pkp, t64); - - kr = _sha3_512_64(kr, buf); - - pkp = s_pkp; - - __indcpa_enc(s_ctp, buf[0:KYBER_SYMBYTES], pkp, kr[KYBER_SYMBYTES:KYBER_SYMBYTES]); - - ctp = s_ctp; - t64 = KYBER_CT_LEN; - kr[KYBER_SYMBYTES:KYBER_SYMBYTES] = _isha3_256(kr[KYBER_SYMBYTES:KYBER_SYMBYTES], ctp, t64); - - shkp = s_shkp; - t64 = KYBER_SSBYTES; - _shake256_64(shkp, t64, kr); -} - -inline -fn __crypto_kem_dec_jazz(reg u64 shkp, reg u64 ctp, reg u64 skp) -{ - stack u8[KYBER_CT_LEN] ctpc; - stack u8[2*KYBER_SYMBYTES] kr buf; - stack u64 s_skp s_ctp s_shkp; - reg u64 pkp hp zp t64 cnd; - inline int i; - - s_shkp = shkp; - s_ctp = ctp; - - buf[0:KYBER_MSGBYTES] = __indcpa_dec(buf[0:KYBER_MSGBYTES], ctp, skp); - - hp = skp + 32; - hp += 24 * KYBER_K * KYBER_N>>3; - - for i=0 to KYBER_SYMBYTES/8 - { - t64 = (u64)[hp + 8*i]; - buf.[u64 KYBER_SYMBYTES + 8*i] = t64; - } - - s_skp = skp; - - kr = _sha3_512_64(kr, buf); - - pkp = s_skp; - pkp += 12 * KYBER_K * KYBER_N>>3; - - ctpc = __iindcpa_enc(ctpc, buf[0:KYBER_SYMBYTES], pkp, kr[KYBER_SYMBYTES:KYBER_SYMBYTES]); - - ctp = s_ctp; - cnd = __verify(ctp, ctpc); - - zp = s_skp; - zp += 64; - zp += 24 * KYBER_K * KYBER_N>>3; - kr[0:KYBER_SYMBYTES] = __cmov(kr[0:KYBER_SYMBYTES], zp, cnd); - - t64 = KYBER_CT_LEN; - kr[KYBER_SYMBYTES:KYBER_SYMBYTES] = _isha3_256(kr[KYBER_SYMBYTES:KYBER_SYMBYTES], ctp, t64); - - shkp = s_shkp; - t64 = KYBER_SSBYTES; - _shake256_64(shkp, t64, kr); -} diff --git a/code/jasmin/ref/ntt.c b/code/jasmin/ref/ntt.c deleted file mode 100644 index b706bbcc..00000000 --- a/code/jasmin/ref/ntt.c +++ /dev/null @@ -1,152 +0,0 @@ -#include -#include "params.h" -#include "ntt.h" -#include "reduce.h" - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define KYBER_ROOT_OF_UNITY 17 - -static const uint16_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; - - -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -void init_ntt() { - unsigned int i, j, k; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i = 1; i < 128; ++i) - tmp[i] = fqmul(tmp[i-1], KYBER_ROOT_OF_UNITY*MONT % KYBER_Q); - - for(i = 0; i < 128; ++i) - zetas[i] = tmp[tree[i]]; - - k = 0; - for(i = 64; i >= 1; i >>= 1) - for(j = i; j < 2*i; ++j) - zetas_inv[k++] = -tmp[128 - tree[j]]; - - zetas_inv[127] = MONT * (MONT * (KYBER_Q - 1) * ((KYBER_Q - 1)/128) % KYBER_Q) % KYBER_Q; -} - -*/ -int16_t zetas[128] = { - 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628}; - -int16_t zetas_inv[128] = { - 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441}; - - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for(len = 128; len >= 2; len >>= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for(j = start; j < start + len; ++j) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt -* -* Description: Inplace inverse number-theoretic transform in Rq -* input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - - k = 0; - for(len = 2; len <= 128; len <<= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas_inv[k++]; - for(j = start; j < start + len; ++j) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = t - r[j + len]; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for(j = 0; j < 256; ++j) - r[j] = fqmul(r[j], zetas_inv[127]); -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/code/jasmin/ref/ntt.h b/code/jasmin/ref/ntt.h deleted file mode 100644 index f7ad4605..00000000 --- a/code/jasmin/ref/ntt.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include - -extern int16_t zetas[128]; -extern int16_t zetas_inv[128]; - -void ntt(int16_t *poly); -void splitntt(int16_t *poly); - -void invntt(int16_t *poly); -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#endif diff --git a/code/jasmin/ref/params.h b/code/jasmin/ref/params.h deleted file mode 100644 index d26e40fe..00000000 --- a/code/jasmin/ref/params.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - -/* Don't change parameters below this line */ - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_ETA 2 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - - -#if KYBER_K == 2 -#define KYBER_POLYCOMPRESSEDBYTES 96 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_INDCPA_MSGBYTES KYBER_SYMBYTES -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ -#define KYBER_CIPHERTEXTBYTES KYBER_INDCPA_BYTES - -#endif diff --git a/code/jasmin/ref/params.jinc b/code/jasmin/ref/params.jinc deleted file mode 100644 index 53049172..00000000 --- a/code/jasmin/ref/params.jinc +++ /dev/null @@ -1,16 +0,0 @@ -param int KYBER_Q = 3329; -param int KYBER_N = 256; -param int KYBER_K = 3; -param int KYBER_VECN = KYBER_K * KYBER_N; - -param int KYBER_SYMBYTES = 32; -param int KYBER_ETA = 2; -param int KYBER_POLYBYTES = 384; -param int KYBER_POLYVECBYTES = (KYBER_K * KYBER_POLYBYTES); -param int KYBER_POLYCOMPRESSEDBYTES = 128; -param int KYBER_POLYVECCOMPRESSEDBYTES = (KYBER_K * 320); -param int KYBER_MSGBYTES = KYBER_SYMBYTES; -param int KYBER_CT_LEN = KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES; -param int KYBER_INDCPA_PUBLICKEYBYTES = KYBER_POLYVECBYTES + KYBER_SYMBYTES; -param int KYBER_PUBLICKEYBYTES = KYBER_INDCPA_PUBLICKEYBYTES; -param int KYBER_SSBYTES = 32; diff --git a/code/jasmin/ref/poly.c b/code/jasmin/ref/poly.c deleted file mode 100644 index 417c3c2c..00000000 --- a/code/jasmin/ref/poly.c +++ /dev/null @@ -1,359 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYCOMPRESSEDBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(unsigned char *r, poly *a) -{ - uint8_t t[8]; - int i,j,k=0; - - poly_csubq(a); - -#if (KYBER_POLYCOMPRESSEDBYTES == 96) - for(i=0;icoeffs[i+j] << 3) + KYBER_Q/2) / KYBER_Q) & 7; - - r[k] = t[0] | (t[1] << 3) | (t[2] << 6); - r[k+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); - r[k+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); - k += 3; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 128) - for(i=0;icoeffs[i+j] << 4) + KYBER_Q/2) / KYBER_Q) & 15; - - r[k] = t[0] | (t[1] << 4); - r[k+1] = t[2] | (t[3] << 4); - r[k+2] = t[4] | (t[5] << 4); - r[k+3] = t[6] | (t[7] << 4); - k += 4; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - for(i=0;icoeffs[i+j] << 5) + KYBER_Q/2) / KYBER_Q) & 31; - - r[k] = t[0] | (t[1] << 5); - r[k+1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); - r[k+2] = (t[3] >> 1) | (t[4] << 4); - r[k+3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); - r[k+4] = (t[6] >> 2) | (t[7] << 3); - k += 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of length KYBER_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const unsigned char *a) -{ - int i; -#if (KYBER_POLYCOMPRESSEDBYTES == 96) - for(i=0;icoeffs[i+0] = (((a[0] & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+1] = ((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+2] = ((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3; - r->coeffs[i+3] = ((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+4] = ((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+5] = ((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3; - r->coeffs[i+6] = ((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3; - r->coeffs[i+7] = ((((a[2] >> 5)) * KYBER_Q) + 4) >> 3; - a += 3; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 128) - for(i=0;icoeffs[i+0] = (((a[0] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+1] = (((a[0] >> 4) * KYBER_Q) + 8) >> 4; - r->coeffs[i+2] = (((a[1] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+3] = (((a[1] >> 4) * KYBER_Q) + 8) >> 4; - r->coeffs[i+4] = (((a[2] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+5] = (((a[2] >> 4) * KYBER_Q) + 8) >> 4; - r->coeffs[i+6] = (((a[3] & 15) * KYBER_Q) + 8) >> 4; - r->coeffs[i+7] = (((a[3] >> 4) * KYBER_Q) + 8) >> 4; - a += 4; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - for(i=0;icoeffs[i+0] = (((a[0] & 31) * KYBER_Q) + 16) >> 5; - r->coeffs[i+1] = ((((a[0] >> 5) | ((a[1] & 3) << 3)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+2] = ((((a[1] >> 2) & 31) * KYBER_Q) + 16) >> 5; - r->coeffs[i+3] = ((((a[1] >> 7) | ((a[2] & 15) << 1)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+4] = ((((a[2] >> 4) | ((a[3] & 1) << 4)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+5] = ((((a[3] >> 1) & 31) * KYBER_Q) + 16) >> 5; - r->coeffs[i+6] = ((((a[3] >> 6) | ((a[4] & 7) << 2)) * KYBER_Q) + 16) >> 5; - r->coeffs[i+7] = (((a[4] >> 3) * KYBER_Q) + 16) >> 5; - a += 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {96, 128, 160}" -#endif -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(unsigned char *r, poly *a) -{ - int i; - uint16_t t0, t1; - - poly_csubq(a); - - for(i=0;icoeffs[2*i]; - t1 = a->coeffs[2*i+1]; - r[3*i] = t0 & 0xff; - r[3*i+1] = (t0 >> 8) | ((t1 & 0xf) << 4); - r[3*i+2] = t1 >> 4; - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *a: pointer to input byte array (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const unsigned char *a) -{ - int i; - - for(i=0;icoeffs[2*i] = a[3*i] | ((uint16_t)a[3*i+1] & 0x0f) << 8; - r->coeffs[2*i+1] = a[3*i+1] >> 4 | ((uint16_t)a[3*i+2] & 0xff) << 4; - } -} - -/************************************************* -* Name: poly_getnoise -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *seed: pointer to input seed (pointing to array of length KYBER_SYMBYTES bytes) -* - unsigned char nonce: one-byte input nonce -**************************************************/ -void poly_getnoise(poly *r, const unsigned char *seed, unsigned char nonce) -{ - unsigned char buf[KYBER_ETA*KYBER_N/4]; - - prf(buf, KYBER_ETA*KYBER_N/4, seed, nonce); - cbd(r, buf); -} - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt(poly *r) -{ - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - - for(i = 0; i < KYBER_N/4; ++i) { - basemul(r->coeffs + 4*i, a->coeffs + 4*i, b->coeffs + 4*i, zetas[64 + i]); - basemul(r->coeffs + 4*i + 2, a->coeffs + 4*i + 2, b->coeffs + 4*i + 2, -zetas[64 + i]); - } -} - -/************************************************* -* Name: poly_frommont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from Montgomery domain to normal domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_frommont(poly *r) -{ - int i; - const int16_t f = (1ULL << 32) % KYBER_Q; - - for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - int i; - - for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); -} - -/************************************************* -* Name: poly_csubq -* -* Description: Applies conditional subtraction of q to each coefficient of a polynomial -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_csubq(poly *r) -{ - int i; - - for(i=0;icoeffs[i] = csubq(r->coeffs[i]); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - int i; - for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const unsigned char *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const unsigned char msg[KYBER_SYMBYTES]) -{ - int i,j; - uint16_t mask; - - for(i=0;i> j)&1); - r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - unsigned char *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(unsigned char msg[KYBER_SYMBYTES], poly *a) -{ - uint16_t t; - int i,j; - - poly_csubq(a); - - for(i=0;icoeffs[8*i+j] << 1) + KYBER_Q/2) / KYBER_Q) & 1; - msg[i] |= t << j; - } - } -} diff --git a/code/jasmin/ref/poly.h b/code/jasmin/ref/poly.h deleted file mode 100644 index 2d097985..00000000 --- a/code/jasmin/ref/poly.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct{ - int16_t coeffs[KYBER_N]; -} poly; - -void poly_compress(unsigned char *r, poly *a); -void poly_decompress(poly *r, const unsigned char *a); - -void poly_tobytes(unsigned char *r, poly *a); -void poly_frombytes(poly *r, const unsigned char *a); - -void poly_frommsg(poly *r, const unsigned char msg[KYBER_SYMBYTES]); -void poly_tomsg(unsigned char msg[KYBER_SYMBYTES], poly *r); - -void poly_getnoise(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt(poly *r); -void poly_invntt(poly *r); -void poly_basemul(poly *r, const poly *a, const poly *b); -void poly_frommont(poly *r); - -void poly_reduce(poly *r); -void poly_csubq(poly *r); - -void poly_add(poly *r, const poly *a, const poly *b); -void poly_sub(poly *r, const poly *a, const poly *b); - - - - - - -void poly_compress_jazz(unsigned char *r, poly *a); -void poly_decompress_jazz(poly *r, const unsigned char *a); - -void poly_tobytes_jazz(unsigned char *r, poly *a); -void poly_frombytes_jazz(poly *r, const unsigned char *a); - -void poly_frommsg_jazz(poly *r, const unsigned char msg[KYBER_SYMBYTES]); -void poly_tomsg_jazz(unsigned char msg[KYBER_SYMBYTES], poly *r); - - -void poly_getnoise_jazz(poly *r,const unsigned char *seed, unsigned char nonce); - -void poly_ntt_jazz(poly *r); -void poly_invntt_jazz(poly *r); -void poly_basemul_jazz(poly *r, const poly *a, const poly *b); -void poly_frommont_jazz(poly *r); - -void poly_reduce_jazz(poly *r); -void poly_csubq_jazz(poly *r); - -void poly_add2_jazz(poly *r, const poly *b); -void poly_sub_jazz(poly *r, const poly *a, const poly *b); - - -#endif diff --git a/code/jasmin/ref/poly.jinc b/code/jasmin/ref/poly.jinc deleted file mode 100644 index 18a51d53..00000000 --- a/code/jasmin/ref/poly.jinc +++ /dev/null @@ -1,700 +0,0 @@ -require "params.jinc" -require "reduce.jinc" -require "fips202.jinc" -require "zetas.jinc" - -fn _poly_add2(reg ptr u16[KYBER_N] rp bp) -> stack u16[KYBER_N] -{ - reg u16 a; - reg u16 b; - reg u16 r; - reg u64 i; - - i = 0; - - while (i < KYBER_N) { - a = rp[(int)i]; - b = bp[(int)i]; - r = a + b; - rp[(int)i] = r; - i += 1; - } - return rp; -} - -fn _poly_csubq(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u64 i; - reg u16 t; - reg u16 b; - - i = 0; - while (i < KYBER_N) - { - t = rp[(int)i]; - t -= KYBER_Q; - b = t; - b >>s= 15; - b &= KYBER_Q; - t += b; - rp[(int)i] = t; - i += 1; - } - return rp; -} - -fn _poly_basemul(reg ptr u16[KYBER_N] rp, reg const ptr u16[KYBER_N] ap bp) -> reg ptr u16[KYBER_N] -{ - reg u64 offset; - reg u16 zeta; - reg u16 r0; - reg u16 r1; - reg u16 a0; - reg u16 a1; - reg u16 b0; - reg u16 b1; - reg u16 t; - reg ptr u16[128] zetasp; - reg u64 zetasctr; - reg u64 i; - - stack ptr u16[KYBER_N] srp; - - srp = rp; - - zetasctr = 64; - i = 0; - - while(i < KYBER_N) - { - zetasp = jzetas; - zeta = zetasp[(int)zetasctr]; - zetasctr += 1; - - a0 = ap[(int)i]; - b0 = bp[(int)i]; - i += 1; - a1 = ap[(int)i]; - b1 = bp[(int)i]; - i -= 1; - - r0 = __fqmul(a1, b1); - r0 = __fqmul(r0, zeta); - t = __fqmul(a0, b0); - r0 += t; - - r1 = __fqmul(a0, b1); - t = __fqmul(a1, b0); - r1 += t; - - rp = srp; - rp[(int)i] = r0; - i += 1; - rp[(int)i] = r1; - srp = rp; - - - zeta = -zeta; - - i += 1; - a0 = ap[(int)i]; - b0 = bp[(int)i]; - i += 1; - a1 = ap[(int)i]; - b1 = bp[(int)i]; - i -= 1; - - r0 = __fqmul(a1, b1); - r0 = __fqmul(r0, zeta); - t = __fqmul(a0, b0); - r0 += t; - - r1 = __fqmul(a0, b1); - t = __fqmul(a1, b0); - r1 += t; - - rp = srp; - rp[(int)i] = r0; - i += 1; - rp[(int)i] = r1; - srp = rp; - - i += 1; - } - return rp; -} - -inline -fn __poly_reduce(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u64 j; - reg u16 t; - j = 0; - while (j < KYBER_N) - { - t = rp[(int)j]; - t = __barrett_reduce(t); - rp[(int)j] = t; - j += 1; - } - return rp; -} - -fn _poly_compress(reg u64 rp, reg ptr u16[KYBER_N] a) -> reg ptr u16[KYBER_N] -{ - reg u16 t; - reg u32 d0, d1; - reg u64 i j; - - a = _poly_csubq(a); - - i = 0; - j = 0; - while(i < 128) - { - t = a[(int)j]; - d0 = (32u)t; - d0 <<= 4; - d0 += 1665; - d0 *= 80635; - d0 >>= 28; - d0 &= 0xf; - j += 1; - t = a[(int)j]; - d1 = (32u)t; - d1 <<= 4; - d1 += 1665; - d1 *= 80635; - d1 >>= 28; - d1 &= 0xf; - d1 <<= 4; - d0 |= d1; - (u8)[rp+i] = d0; - i += 1; - j += 1; - } - return a; -} - -fn _i_poly_compress(reg ptr u8[KYBER_POLYCOMPRESSEDBYTES] rp, reg ptr u16[KYBER_N] a) -> reg ptr u8[KYBER_POLYCOMPRESSEDBYTES], reg ptr u16[KYBER_N] -{ - reg u16 t; - reg u32 d0, d1; - reg u64 i j; - - a = _poly_csubq(a); - - i = 0; - j = 0; - while(i < 128) - { - t = a[(int)j]; - d0 = (32u)t; - d0 <<= 4; - d0 += 1665; - d0 *= 80635; - d0 >>= 28; - d0 &= 0xf; - j += 1; - t = a[(int)j]; - d1 = (32u)t; - d1 <<= 4; - d1 += 1665; - d1 *= 80635; - d1 >>= 28; - d1 &= 0xf; - d1 <<= 4; - d0 |= d1; - rp[(int) i] = d0; - i += 1; - j += 1; - } - return rp, a; -} - - -fn _poly_decompress(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] -{ - reg u8 t; - reg u16 d0, d1; - reg u64 i j; - - i = 0; - j = 0; - - while (i < 128) { - t = (u8)[ap+i]; - d0 = (16u)t; - d1 = (16u)t; - d0 &= 0xf; - d1 >>= 4; - d0 *= KYBER_Q; - d1 *= KYBER_Q; - d0 += 8; - d1 += 8; - d0 >>= 4; - d1 >>= 4; - rp[(int)j] = d0; - j += 1; - rp[(int)j] = d1; - j += 1; - i += 1; - } - return rp; -} - -fn _poly_frombytes(reg ptr u16[KYBER_N] rp, reg u64 ap) -> reg ptr u16[KYBER_N] -{ - reg u8 c0, c1, c2; - reg u16 d0, d1, t; - inline int i; - - for i = 0 to KYBER_N/2 - { - c0 = (u8)[ap+3*i]; - c1 = (u8)[ap+3*i+1]; - c2 = (u8)[ap+3*i+2]; - d0 = (16u)c0; - t = (16u)c1; - t &= 0xf; - t <<= 8; - d0 |= t; - d1 = (16u)c2; - d1 <<= 4; - t = (16u)c1; - t >>= 4; - d1 |= t; - rp[2*i] = d0; - rp[2*i+1] = d1; - } - return rp; -} - -param int DMONT = 1353; /* (1ULL << 32) % KYBER_Q */ - -fn _poly_frommont(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u64 i; - reg u16 r; - reg u16 dmont; - - dmont = DMONT; - - i = 0; - while (i < KYBER_N) - { - r = rp[(int)i]; - r = __fqmul(r, dmont); - rp[(int)i] = r; - i += 1; - } - return rp; -} - -fn _poly_frommsg(reg ptr u16[KYBER_N] rp, reg u64 ap) -> stack u16[KYBER_N] -{ - reg u8 c; - reg u16 t; - inline int i; - inline int j; - - for i = 0 to 32 - { - c = (u8)[ap + i]; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+1] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+2] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+3] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+4] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+5] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+6] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+7] = t; - c >>= 1; - } - return rp; -} - - -fn _i_poly_frommsg(reg ptr u16[KYBER_N] rp, reg ptr u8[32] ap) -> stack u16[KYBER_N] -{ - reg u8 c; - reg u16 t; - inline int i; - inline int j; - - for i = 0 to 32 - { - c = ap[i]; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+1] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+2] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+3] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+4] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+5] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+6] = t; - c >>= 1; - - t = (16u)c; - t &= 1; - t *= (KYBER_Q+1)/2; - rp[8*i+7] = t; - c >>= 1; - } - return rp; -} - -fn _poly_getnoise(reg ptr u16[KYBER_N] rp, reg ptr u8[KYBER_SYMBYTES] seed, reg u8 nonce) -> reg ptr u16[KYBER_N] -{ - stack u8[33] extseed; /* 33 = KYBER_SYMBYTES +1 */ - stack u8[128] buf; /* 128 = KYBER_ETA*KYBER_N/4 */ - reg u64 outlen; - reg u8 c,a,b; - reg u16 t; - reg u64 i j; - inline int k; - - stack ptr u16[KYBER_N] srp; - - srp = rp; - - for k = 0 to KYBER_SYMBYTES - { - c = seed[k]; - extseed[k] = c; - } - extseed[KYBER_SYMBYTES] = nonce; - - buf = _shake256_128_33(buf, extseed); - - rp = srp; - - i = 0; - j = 0; - while (i < 128) { - c = buf[(int)i]; - a = c; - a &= 0x55; - - c >>= 1; - c &= 0x55; - c += a; - - a = c; - a &= 0x3; - b = c; - b >>= 2; - b &= 0x3; - a -= b; - t = (16s)a; - rp[(int)j] = t; - a = c; - a >>= 4; - a &= 0x3; - b = c >> 6; - b &= 0x3; - a -= b; - t = (16s)a; - j += 1; - rp[(int)j] = t; - i += 1; - j += 1; - } - - return rp; -} - -fn _poly_invntt(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u64 len; - reg u64 start; - reg u64 j; - reg u64 cmp; - reg u64 offset; - - reg u16 zeta; - reg u16 t; - reg u16 s; - reg u16 m; - - reg ptr u16[128] zetasp; - reg u64 zetasctr; - - zetasp = jzetas_inv; - zetasctr = 0; - - len = 2; - while (len <= 128) - { - start = 0; - while (start < 256) - { - zeta = zetasp[(int)zetasctr]; - zetasctr += 1; - - j = start; - cmp = start + len; - while (j < cmp) - { - offset = j + len; - s = rp[(int)offset]; - t = rp[(int)j]; - m = s + t; - m = __barrett_reduce(m); - rp[(int)j] = m; - t -= s; - t = __fqmul(t, zeta); - rp[(int)offset] = t; - j += 1; - } - start = j + len; - } - len <<= 1; - } - - zeta = zetasp[127]; - j = 0; - while (j < KYBER_N) - { - t = rp[(int)j]; - t = __fqmul(t, zeta); - rp[(int)j] = t; - j += 1; - } - return rp; -} - -fn _poly_ntt(reg ptr u16[KYBER_N] rp) -> reg ptr u16[KYBER_N] -{ - reg u64 len; - reg u64 start; - reg u64 j; - reg u64 cmp; - reg u64 offset; - - reg u16 zeta; - reg u16 t; - reg u16 s; - reg u16 m; - - reg ptr u16[128] zetasp; - reg u64 zetasctr; - - zetasp = jzetas; - zetasctr = 0; - len = 128; - while (len >= 2) - { - start = 0; - while (start < 256) - { - zetasctr += 1; - zeta = zetasp[(int)zetasctr]; - j = start; - cmp = start + len; - while (j < cmp) - { - offset = j + len; - t = rp[(int)offset]; - t = __fqmul(t, zeta); - s = rp[(int)j]; - m = s; - m -= t; - rp[(int)offset] = m; - t += s; - rp[(int)j] = t; - j += 1; - } - start = j + len; - } - len >>= 1; - } - - rp = __poly_reduce(rp); - - return rp; -} - -fn _poly_sub(reg ptr u16[KYBER_N] rp ap bp) -> reg ptr u16[KYBER_N] -{ - reg u16 a; - reg u16 b; - reg u16 r; - reg u64 i; - - i = 0; - while (i < KYBER_N) { - a = ap[(int)i]; - b = bp[(int)i]; - r = a - b; - rp[(int)i] = r; - i += 1; - } - return rp; -} - -fn _poly_tobytes(reg u64 rp, reg ptr u16[KYBER_N] a) -> reg ptr u16[KYBER_N] -{ - reg u16 t0, t1, d; - reg u64 i j; - - a = _poly_csubq(a); - - i = 0; - j = 0; - while (i < KYBER_N) - { - t0 = a[(int)i]; - i += 1; - t1 = a[(int)i]; - i += 1; - d = t0; - d &= 0xff; - (u8)[rp+j] = d; - j += 1; - t0 >>= 8; - d = t1; - d &= 0xf; - d <<= 4; - d |= t0; - (u8)[rp+j] = d; - j += 1; - t1 >>= 4; - (u8)[rp+j] = t1; - j += 1; - } - return a; -} - -fn _poly_tomsg(reg u64 rp, reg ptr u16[KYBER_N] a) -> reg ptr u16[KYBER_N] -{ - reg u16 t; - reg u8 r; - reg u32 d; - inline int i j; - - a = _poly_csubq(a); - - for i = 0 to 32 - { - r = 0; - for j = 0 to 8 - { - t = a[8*i+j]; - d = (32u)t; - d <<= 1; - d += 1665; - d *= 80635; - d >>= 28; - d &= 1; - d <<= j; - r |= d; - } - - (u8)[rp+i] = r; - } - return a; -} - -fn _i_poly_tomsg(reg ptr u8[KYBER_N/8] rp, reg ptr u16[KYBER_N] a) -> reg ptr u8[KYBER_N/8], reg ptr u16[KYBER_N] -{ - reg u16 t; - reg u8 r; - reg u32 d; - inline int i j; - - a = _poly_csubq(a); - - for i = 0 to 32 - { - r = 0; - for j = 0 to 8 - { - t = a[8*i+j]; - d = (32u)t; - d <<= 1; - d += 1665; - d *= 80635; - d >>= 28; - d &= 1; - d <<= j; - r |= d; - } - - rp[i] = r; - } - - return rp, a; -} diff --git a/code/jasmin/ref/poly_ntt.c b/code/jasmin/ref/poly_ntt.c deleted file mode 100644 index 83341812..00000000 --- a/code/jasmin/ref/poly_ntt.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "poly.h" -#include "ntt.h" - -void poly_ntt_jazz(poly *r, int16_t *zetas) -{ - ntt(r->coeffs); - poly_reduce(r); -} - - diff --git a/code/jasmin/ref/polyvec.c b/code/jasmin/ref/polyvec.c deleted file mode 100644 index 316543a0..00000000 --- a/code/jasmin/ref/polyvec.c +++ /dev/null @@ -1,237 +0,0 @@ -#include -#include "polyvec.h" -#include "poly.h" - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(unsigned char *r, polyvec *a) -{ - int i,j,k; - - polyvec_csubq(a); - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;ivec[i].coeffs[8*j+k] << 11) + KYBER_Q/2) / KYBER_Q) & 0x7ff; - - r[11*j+ 0] = t[0] & 0xff; - r[11*j+ 1] = (t[0] >> 8) | ((t[1] & 0x1f) << 3); - r[11*j+ 2] = (t[1] >> 5) | ((t[2] & 0x03) << 6); - r[11*j+ 3] = (t[2] >> 2) & 0xff; - r[11*j+ 4] = (t[2] >> 10) | ((t[3] & 0x7f) << 1); - r[11*j+ 5] = (t[3] >> 7) | ((t[4] & 0x0f) << 4); - r[11*j+ 6] = (t[4] >> 4) | ((t[5] & 0x01) << 7); - r[11*j+ 7] = (t[5] >> 1) & 0xff; - r[11*j+ 8] = (t[5] >> 9) | ((t[6] & 0x3f) << 2); - r[11*j+ 9] = (t[6] >> 6) | ((t[7] & 0x07) << 5); - r[11*j+10] = (t[7] >> 3); - } - r += 352; - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;ivec[i].coeffs[4*j+k] << 10) + KYBER_Q/2) / KYBER_Q) & 0x3ff; - - r[5*j+ 0] = t[0] & 0xff; - r[5*j+ 1] = (t[0] >> 8) | ((t[1] & 0x3f) << 2); - r[5*j+ 2] = (t[1] >> 6) | ((t[2] & 0x0f) << 4); - r[5*j+ 3] = (t[2] >> 4) | ((t[3] & 0x03) << 6); - r[5*j+ 4] = (t[3] >> 2); - } - r += 320; - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - unsigned char *a: pointer to input byte array (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const unsigned char *a) -{ - int i,j; -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i].coeffs[8*j+0] = (((a[11*j+ 0] | (((uint32_t)a[11*j+ 1] & 0x07) << 8)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+1] = ((((a[11*j+ 1] >> 3) | (((uint32_t)a[11*j+ 2] & 0x3f) << 5)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+2] = ((((a[11*j+ 2] >> 6) | (((uint32_t)a[11*j+ 3] & 0xff) << 2) | (((uint32_t)a[11*j+ 4] & 0x01) << 10)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+3] = ((((a[11*j+ 4] >> 1) | (((uint32_t)a[11*j+ 5] & 0x0f) << 7)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+4] = ((((a[11*j+ 5] >> 4) | (((uint32_t)a[11*j+ 6] & 0x7f) << 4)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+5] = ((((a[11*j+ 6] >> 7) | (((uint32_t)a[11*j+ 7] & 0xff) << 1) | (((uint32_t)a[11*j+ 8] & 0x03) << 9)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+6] = ((((a[11*j+ 8] >> 2) | (((uint32_t)a[11*j+ 9] & 0x1f) << 6)) * KYBER_Q) + 1024) >> 11; - r->vec[i].coeffs[8*j+7] = ((((a[11*j+ 9] >> 5) | (((uint32_t)a[11*j+10] & 0xff) << 3)) * KYBER_Q) + 1024) >> 11; - } - a += 352; - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i].coeffs[4*j+0] = (((a[5*j+ 0] | (((uint32_t)a[5*j+ 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+1] = ((((a[5*j+ 1] >> 2) | (((uint32_t)a[5*j+ 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+2] = ((((a[5*j+ 2] >> 4) | (((uint32_t)a[5*j+ 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10; - r->vec[i].coeffs[4*j+3] = ((((a[5*j+ 3] >> 6) | (((uint32_t)a[5*j+ 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10; - } - a += 320; - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - unsigned char *r: pointer to output byte array (needs space for KYBER_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(unsigned char *r, polyvec *a) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - unsigned char *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const unsigned char *a) -{ - int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_pointwise_acc -* -* Description: Pointwise multiply elements of a and b and accumulate into r -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) -{ - int i; - poly t; - - poly_basemul(r, &a->vec[0], &b->vec[0]); - for(i=1;ivec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of each element of a vector of polynomials -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_csubq(polyvec *r) -{ - int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/code/jasmin/ref/polyvec.h b/code/jasmin/ref/polyvec.h deleted file mode 100644 index 9fbdb673..00000000 --- a/code/jasmin/ref/polyvec.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -void polyvec_compress(unsigned char *r, polyvec *a); -void polyvec_decompress(polyvec *r, const unsigned char *a); - -void polyvec_tobytes(unsigned char *r, polyvec *a); -void polyvec_frombytes(polyvec *r, const unsigned char *a); - -void polyvec_ntt(polyvec *r); -void polyvec_invntt(polyvec *r); - -void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce(polyvec *r); -void polyvec_csubq(polyvec *r); - -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - - - - -void polyvec_compress_jazz(unsigned char *r, polyvec *a); -void polyvec_decompress_jazz(polyvec *r, const unsigned char *a); - -void polyvec_tobytes_jazz(unsigned char *r, polyvec *a); -void polyvec_frombytes_jazz(polyvec *r, const unsigned char *a); - -void polyvec_ntt_jazz(polyvec *r); -void polyvec_invntt_jazz(polyvec *r); - -void polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); - -void polyvec_reduce_jazz(polyvec *r); -void polyvec_csubq_jazz(polyvec *r); - -void polyvec_add2_jazz(polyvec *r, const polyvec *b); - - -#endif diff --git a/code/jasmin/ref/polyvec.jinc b/code/jasmin/ref/polyvec.jinc deleted file mode 100644 index 52438b3f..00000000 --- a/code/jasmin/ref/polyvec.jinc +++ /dev/null @@ -1,283 +0,0 @@ -require "params.jinc" -require "poly.jinc" - -inline -fn __polyvec_add2(stack u16[KYBER_VECN] r, stack u16[KYBER_VECN] b) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_add2(r[0:KYBER_N], b[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_add2(r[KYBER_N:KYBER_N], b[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_add2(r[2*KYBER_N:KYBER_N], b[2*KYBER_N:KYBER_N]); - - return r; -} - -inline -fn __polyvec_csubq(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_csubq(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_csubq(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_csubq(r[2*KYBER_N:KYBER_N]); - - return r; -} - -inline -fn __polyvec_compress(reg u64 rp, stack u16[KYBER_VECN] a) -{ - stack u16[KYBER_VECN] aa; - reg u16 c, b; - reg u16 d; - reg u64[4] t; - reg u64 i j; - inline int k; - - i = 0; - j = 0; - - aa = __polyvec_csubq(a); - - while (i < KYBER_VECN) - { - for k = 0 to 4 - { - t[k] = (64u)aa[(int) i]; - i += 1; - t[k] <<= 10; - t[k] += 1665; - t[k] *= 1290167; - t[k] >>= 32; - t[k] &= 0x3ff; - } - - c = t[0]; - c &= 0xff; - (u8)[rp + j] = c; - j += 1; - - b = t[0]; - b >>= 8; - c = t[1]; - c <<= 2; - c |= b; - (u8)[rp + j] = c; - j += 1; - - b = t[1]; - b >>= 6; - c = t[2]; - c <<= 4; - c |= b; - (u8)[rp + j] = c; - j += 1; - - b = t[2]; - b >>= 4; - c = t[3]; - c <<= 6; - c |= b; - (u8)[rp + j] = c; - j += 1; - - - t[3] >>= 2; - (u8)[rp + j] = t[3]; - j += 1; - } -} - -inline -fn __i_polyvec_compress(reg ptr u8[KYBER_POLYVECCOMPRESSEDBYTES] rp, stack u16[KYBER_VECN] a) -> reg ptr u8[KYBER_POLYVECCOMPRESSEDBYTES] -{ - stack u16[KYBER_VECN] aa; - reg u16 c, b; - reg u16 d; - reg u64[4] t; - reg u64 i j; - inline int k; - - i = 0; - j = 0; - - aa = __polyvec_csubq(a); - - while (i < KYBER_VECN) - { - for k = 0 to 4 - { - t[k] = (64u)aa[(int) i]; - i += 1; - t[k] <<= 10; - t[k] += 1665; - t[k] *= 1290167; - t[k] >>= 32; - t[k] &= 0x3ff; - } - - c = t[0]; - c &= 0xff; - rp[(int) j] = c; - j += 1; - - b = t[0]; - b >>= 8; - c = t[1]; - c <<= 2; - c |= b; - rp[(int) j] = c; - j += 1; - - b = t[1]; - b >>= 6; - c = t[2]; - c <<= 4; - c |= b; - rp[(int) j] = c; - j += 1; - - b = t[2]; - b >>= 4; - c = t[3]; - c <<= 6; - c |= b; - rp[(int) j] = c; - j += 1; - - - t[3] >>= 2; - rp[(int) j] = t[3]; - j += 1; - } - - return rp; -} - -inline -fn __polyvec_decompress(reg u64 ap) -> stack u16[KYBER_VECN] -{ - stack u16[KYBER_VECN] r; - reg u32[5] t; - reg u32 d; - reg u64 i j; - inline int k; - - i = 0; - j = 0; - - while (i < KYBER_VECN) - { - for k = 0 to 5 - { - t[k] = (32u)(u8)[ap + j]; - j += 1; - } - - d = t[1]; - t[1] >>= 2; - d &= 0x3; - d <<= 8; - t[0] |= d; - - d = t[2]; - t[2] >>= 4; - d &= 0xf; - d <<= 6; - t[1] |= d; - - d = t[3]; - t[3] >>= 6; - d &= 0x3f; - d <<= 4; - t[2] |= d; - - d = t[4]; - d <<= 2; - t[3] |= d; - - for k = 0 to 4 - { - t[k] *= KYBER_Q; - t[k] += 512; - t[k] >>= 10; - r[(int) i] = t[k]; - i += 1; - } - } - return r; -} - -inline -fn __polyvec_frombytes(reg u64 ap) -> stack u16[KYBER_VECN] -{ - stack u16[KYBER_VECN] r; - reg u64 pp; - - pp = ap; - r[0:KYBER_N] = _poly_frombytes(r[0:KYBER_N], pp); - pp += KYBER_POLYBYTES; - r[KYBER_N:KYBER_N] = _poly_frombytes(r[KYBER_N:KYBER_N], pp); - pp += KYBER_POLYBYTES; - r[2*KYBER_N:KYBER_N] = _poly_frombytes(r[2*KYBER_N:KYBER_N], pp); - - return r; -} - -inline -fn __polyvec_invntt(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_invntt(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_invntt(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_invntt(r[2*KYBER_N:KYBER_N]); - - return r; -} - -inline -fn __polyvec_ntt(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = _poly_ntt(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = _poly_ntt(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = _poly_ntt(r[2*KYBER_N:KYBER_N]); - - return r; -} - - -inline -fn __polyvec_pointwise_acc(stack u16[KYBER_VECN] a, stack u16[KYBER_VECN] b) -> stack u16[KYBER_N] -{ - stack u16[KYBER_N] t; - stack u16[KYBER_N] r; - - r = _poly_basemul(r, a[0:KYBER_N], b[0:KYBER_N]); - t = _poly_basemul(t, a[KYBER_N:KYBER_N], b[KYBER_N:KYBER_N]); - r = _poly_add2(r, t); - t = _poly_basemul(t, a[2*KYBER_N:KYBER_N], b[2*KYBER_N:KYBER_N]); - r = _poly_add2(r, t); - - r = __poly_reduce(r); - - return r; -} - -inline -fn __polyvec_reduce(stack u16[KYBER_VECN] r) -> stack u16[KYBER_VECN] -{ - r[0:KYBER_N] = __poly_reduce(r[0:KYBER_N]); - r[KYBER_N:KYBER_N] = __poly_reduce(r[KYBER_N:KYBER_N]); - r[2*KYBER_N:KYBER_N] = __poly_reduce(r[2*KYBER_N:KYBER_N]); - - return r; -} - -inline -fn __polyvec_tobytes(reg u64 rp, stack u16[KYBER_VECN] a) -{ - reg u64 pp; - - pp = rp; - a[0:KYBER_N] = _poly_tobytes(pp, a[0:KYBER_N]); - pp += KYBER_POLYBYTES; - a[KYBER_N:KYBER_N] = _poly_tobytes(pp, a[KYBER_N:KYBER_N]); - pp += KYBER_POLYBYTES; - a[2*KYBER_N:KYBER_N] = _poly_tobytes(pp, a[2*KYBER_N:KYBER_N]); -} diff --git a/code/jasmin/ref/reduce.c b/code/jasmin/ref/reduce.c deleted file mode 100644 index 39264b09..00000000 --- a/code/jasmin/ref/reduce.c +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include "params.h" -#include "reduce.h" - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, -* where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) -{ - int32_t t; - int16_t u; - -// printf("a: %d\n", a); - u = a * QINV; - t = (int32_t)u * KYBER_Q; - t = a - t; - t >>= 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* 16-bit integer congruent to a mod q in {0,...,q} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {0,...,q} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int32_t t; - const int32_t v = (1U << 26)/KYBER_Q + 1; - - t = v*a; - t >>= 26; - t *= KYBER_Q; - return a - t; -} - -/************************************************* -* Name: csubq -* -* Description: Conditionallly subtract q -* -* Arguments: - int16_t x: input integer -* -* Returns: a - q if a >= q, else a -**************************************************/ -int16_t csubq(int16_t a) { - a -= KYBER_Q; - a += (a >> 15) & KYBER_Q; - return a; -} diff --git a/code/jasmin/ref/reduce.h b/code/jasmin/ref/reduce.h deleted file mode 100644 index 59ee6ef4..00000000 --- a/code/jasmin/ref/reduce.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include - -#define MONT 2285 // 2^16 % Q -#define QINV 62209 // q^(-1) mod 2^16 - -int16_t montgomery_reduce(int32_t a); - -int16_t barrett_reduce(int16_t a); - -int16_t csubq(int16_t x); - -#endif diff --git a/code/jasmin/ref/reduce.jinc b/code/jasmin/ref/reduce.jinc deleted file mode 100644 index b9d53bec..00000000 --- a/code/jasmin/ref/reduce.jinc +++ /dev/null @@ -1,70 +0,0 @@ -require "params.jinc" - -param int QINV = 62209; /* q^(-1) mod 2^16 */ -param int MONT = 2285; /* 2^16 % Q */ -param int BARR = 20159; /* (1U << 26)/KYBER_Q + 1 */ - -/* -inline -fn __fqmul(reg u16 a, reg u16 b) -> reg u16 -{ - reg u32 ad; - reg u32 bd; - reg u32 c; - reg u32 t; - reg u16 r; - reg u32 u; - - ad = (32s)a; - bd = (32s)b; - - c = ad * bd; - - u = c * QINV; - u <<= 16; - //u = #SAR_32(u, 16); - u >>s= 16; - t = u * KYBER_Q; - t = c - t; - //t = #SAR_32(t, 16); - t >>s= 16; - r = t; - return r; -} -*/ - -inline fn __fqmul(reg u16 a b) -> reg u16 -{ - reg u32 ad bd c t u; - reg u16 r; - - ad = (32s) a; - bd = (32s) b; - c = ad * bd; - - u = c * (QINV << 16); // merge multiplication of u (<<16) - u >>s= 16; - - t = u * -KYBER_Q; // replace sub by add - t += c; - t >>s= 16; - r = t; - - return r; -} - -inline -fn __barrett_reduce(reg u16 a) -> reg u16 -{ - reg u32 t; - reg u16 r; - t = (32s)a; - t = t * BARR; - //t = #SAR_32(t, 26); - t >>s= 26; - t *= KYBER_Q; - r = t; - r = a; - r -= t; - return r; -} diff --git a/code/jasmin/ref/symmetric-fips202.c b/code/jasmin/ref/symmetric-fips202.c deleted file mode 100644 index cf159db3..00000000 --- a/code/jasmin/ref/symmetric-fips202.c +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state -* - const unsigned char *input: pointer to KYBER_SYMBYTES input to be absorbed into s -* - unsigned char i additional byte of input -* - unsigned char j additional byte of input -**************************************************/ -void kyber_shake128_absorb(keccak_state *s, const unsigned char *input, unsigned char x, unsigned char y) -{ - unsigned char extseed[KYBER_SYMBYTES+2]; - int i; - - for(i=0;is, extseed, KYBER_SYMBYTES+2); -} - -/************************************************* -* Name: kyber_shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - unsigned char *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - keccak_state *s: pointer to in/output Keccak state -**************************************************/ -void kyber_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, keccak_state *s) -{ - shake128_squeezeblocks(output, nblocks, s->s); -} - -/************************************************* -* Name: shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - unsigned char *output: pointer to output -* - unsigned long long outlen: number of requested output bytes -* - const unsigned char * key: pointer to the key (of length KYBER_SYMBYTES) -* - const unsigned char nonce: single-byte nonce (public PRF input) -**************************************************/ -void shake256_prf(unsigned char *output, unsigned long long outlen, const unsigned char *key, const unsigned char nonce) -{ - unsigned char extkey[KYBER_SYMBYTES+1]; - size_t i; - - for(i=0;i -#include -#include -#include - -#include "../params.h" -#include "../ntt.h" -#include "../indcpa.h" - -#define NRUNS 100 - -static inline uint64_t cpucycles(void) { - uint64_t result; - - asm volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax" - : "=a" (result) : : "%rdx"); - - return result; -} - -static int cmp_uint64(const void *a, const void *b) { - if(*(uint64_t *)a < *(uint64_t *)b) return -1; - if(*(uint64_t *)a > *(uint64_t *)b) return 1; - return 0; -} - -static uint64_t median(uint64_t *l, size_t llen) { - qsort(l,llen,sizeof(uint64_t),cmp_uint64); - - if(llen%2) return l[llen/2]; - else return (l[llen/2-1]+l[llen/2])/2; -} - -static uint64_t average(uint64_t *t, size_t tlen) { - size_t i; - uint64_t acc=0; - - for(i=0;i -#include "../fips202.h" - -#define MAXINLEN 33 -#define MAXOUTLEN 168 - -int main(void) -{ - unsigned char in[MAXINLEN]; - unsigned char out0[MAXOUTLEN]; - unsigned char out1[MAXOUTLEN]; - uint64_t state0[25]; - uint64_t state1[25]; - int k; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, sizeof(in), urandom); - - shake256(out0, 128, in, 33); - shake256_128_33_jazz(out1, in); - - for(k=0;k<128;k++) - if(out0[k] != out1[k]) printf("error shake256 at %d: %d %d\n", k, out0[k], out1[k]); - - sha3_512(out0, in, 32); - sha3512_32_jazz(out1, in); - - for(k=0;k<64;k++) - if(out0[k] != out1[k]) printf("error sha3512 at %d: %d %d\n", k, out0[k], out1[k]); - - shake128_absorb(state0, in, 34); - shake128_absorb34_jazz(state1, in); - - for(k=0;k<25;k++) - if(state0[k] != state1[k]) printf("error shake128_absorb at %d: %lu %lu\n", k, state0[k], state1[k]); - - shake128_squeezeblocks(out0, 1, state0); - shake128_squeezeblock_jazz(out1, state1); - - for(k=0;k<25;k++) - if(state0[k] != state1[k]) printf("error shake128_squeezeblock (state) at %d: %lu %lu\n", k, state0[k], state1[k]); - - for(k=0;k - -#include "../params.h" -#include "../ntt.h" -#include "../indcpa.h" - -int main(void) -{ - unsigned char sk0[KYBER_INDCPA_SECRETKEYBYTES]; - unsigned char sk1[KYBER_INDCPA_SECRETKEYBYTES]; - unsigned char pk0[KYBER_INDCPA_PUBLICKEYBYTES]; - unsigned char pk1[KYBER_INDCPA_PUBLICKEYBYTES]; - unsigned char ct0[KYBER_INDCPA_BYTES]; - unsigned char ct1[KYBER_INDCPA_BYTES]; - - unsigned char randomness0[KYBER_SYMBYTES]; - unsigned char randomness1[KYBER_SYMBYTES]; - unsigned char message[KYBER_INDCPA_MSGBYTES]; - - /* - unsigned char outmsg0[KYBER_INDCPA_MSGBYTES]; - unsigned char outmsg1[KYBER_INDCPA_MSGBYTES]; - */ - unsigned char outmsg0[KYBER_POLYVECBYTES]; - unsigned char outmsg1[KYBER_POLYVECBYTES]; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(randomness0, KYBER_SYMBYTES, 1, urandom); - fread(randomness1, KYBER_SYMBYTES, 1, urandom); - fread(message, KYBER_SYMBYTES, 1, urandom); - fclose(urandom); - - /* TEST KEYPAIR */ - indcpa_keypair_jazz(pk1, sk1, randomness0); - indcpa_keypair(pk0, sk0, randomness0); - - for(int i=0;i -#include - -#include "../params.h" -#include "../ntt.h" -#include "../kem.h" - -int main(void) -{ - unsigned char sk0[KYBER_SECRETKEYBYTES]; - unsigned char sk1[KYBER_SECRETKEYBYTES]; - unsigned char pk0[KYBER_PUBLICKEYBYTES]; - unsigned char pk1[KYBER_PUBLICKEYBYTES]; - unsigned char ct0[KYBER_CIPHERTEXTBYTES]; - unsigned char ct1[KYBER_CIPHERTEXTBYTES]; - unsigned char shk0[KYBER_SSBYTES]; - unsigned char shk1[KYBER_SSBYTES]; - - unsigned char randomness0[2*KYBER_SYMBYTES]; - unsigned char randomness1[2*KYBER_SYMBYTES]; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(randomness0, 2*KYBER_SYMBYTES, 1, urandom); - fread(randomness1, 2*KYBER_SYMBYTES, 1, urandom); - fclose(urandom); - - /* TEST KEYPAIR */ - jade_kem_kyber_kyber768_amd64_ref_keypair_derand(pk1, sk1, randomness0); - crypto_kem_keypair(pk0, sk0, randomness0); - - for(int i=0;i -#include "../poly.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - poly a, b, r0; - - poly_setrandom(&a); - poly_setrandom(&b); - - poly_add(&r0, &a, &b); - - poly_add2_jazz(&a, &b); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - poly a, b, r0, r1; - - poly_setrandom(&a); - poly_setrandom(&b); - - poly_basemul(&r0, &a, &b); - - poly_basemul_jazz(&r1, &a, &b); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - unsigned char out0[128]; - unsigned char out1[128]; - poly a; - - poly_setrandom(&a); - - poly_compress(out0, &a); - poly_compress_jazz(out1, &a); - - for(int i=0;i<128;i++) - { - if(out0[i] != out1[i]) - printf("error compress %d, %d, %d\n", i, out0[i], out1[i]); - } - - return 0; -} diff --git a/code/jasmin/ref/test/test_poly_csubq.c b/code/jasmin/ref/test/test_poly_csubq.c deleted file mode 100644 index 87f28e08..00000000 --- a/code/jasmin/ref/test/test_poly_csubq.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -int main(void) -{ - unsigned char in[KYBER_POLYCOMPRESSEDBYTES]; - poly r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYCOMPRESSEDBYTES, urandom); - fclose(urandom); - - poly_decompress(&r0, in); - poly_decompress_jazz(&r1, in); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -int main(void) -{ - unsigned char in[KYBER_POLYBYTES]; - poly r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYBYTES, urandom); - fclose(urandom); - - poly_frombytes(&r0, in); - poly_frombytes_jazz(&r1, in); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - } - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -int main(void) -{ - unsigned char in[32]; - poly r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, 32, urandom); - fclose(urandom); - - poly_frommsg(&r0, in); - poly_frommsg_jazz(&r1, in); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" -#include "../params.h" - - -int main(void) -{ - poly r0, r1; - unsigned char seed[KYBER_SYMBYTES]; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(seed, 1, KYBER_SYMBYTES, urandom); - fclose(urandom); - - for(int i = 0; i < 1; i++) - { - poly_getnoise(&r0, seed, i); - poly_getnoise_jazz(&r1, seed, i); - - for(int j=0;j -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= 2*KYBER_Q; - } - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= 2*KYBER_Q; - } - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); -} - -int main(void) -{ - poly r0, r1; - - poly_setrandom(&r0); - - for(int i=0;i -#include "../poly.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;icoeffs[i] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - poly a, b, r0, r1; - - poly_setrandom(&a); - poly_setrandom(&b); - - poly_sub(&r0, &a, &b); - - poly_sub_jazz(&r1, &a, &b); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - unsigned char out0[KYBER_POLYBYTES]; - unsigned char out1[KYBER_POLYBYTES]; - poly a; - - poly_setrandom(&a); - - poly_tobytes(out0, &a); - poly_tobytes_jazz(out1, &a); - - for(int i=0;i -#include "../poly.h" -#include "../ntt.h" - -void poly_setrandom(poly *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - fread(r->coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - poly_reduce(r); -} - -int main(void) -{ - unsigned char out0[KYBER_INDCPA_MSGBYTES]; - unsigned char out1[KYBER_INDCPA_MSGBYTES]; - poly a; - - poly_setrandom(&a); - - poly_tomsg(out0, &a); - poly_tomsg_jazz(out1, &a); - - for(int i=0;i -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec a, b, r0; - - polyvec_setrandom(&a); - polyvec_setrandom(&b); - - polyvec_add(&r0, &a, &b); - polyvec_add2_jazz(&a, &b); - - for(int i=0;i -#include "../polyvec.h" -#include "../ntt.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - - polyvec_reduce(r); - fclose(urandom); -} - -int main(void) -{ - unsigned char out0[KYBER_POLYVECCOMPRESSEDBYTES]; - unsigned char out1[KYBER_POLYVECCOMPRESSEDBYTES]; - polyvec a; - - polyvec_setrandom(&a); - - polyvec_compress(out0, &a); - polyvec_compress_jazz(out1, &a); - - for(int i=0;i -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); - polyvec_reduce(r); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../polyvec.h" - -int main(void) -{ - unsigned char in[KYBER_POLYVECCOMPRESSEDBYTES]; - polyvec r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYVECCOMPRESSEDBYTES, urandom); - fclose(urandom); - - polyvec_decompress(&r0, in); - polyvec_decompress_jazz(&r1, in); - - for(int i=0;i -#include "../polyvec.h" - -int main(void) -{ - unsigned char in[KYBER_POLYVECBYTES]; - polyvec r0, r1; - - FILE *urandom = fopen("/dev/urandom", "r"); - fread(in, 1, KYBER_POLYVECBYTES, urandom); - fclose(urandom); - - polyvec_frombytes(&r0, in); - polyvec_frombytes_jazz(&r1, in); - - for(int i=0;i -#include "../ntt.h" -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= 2*KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../ntt.h" -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= 2*KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../ntt.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - polyvec a, b; - poly r0, r1; - - polyvec_setrandom(&a); - polyvec_setrandom(&b); - - polyvec_pointwise_acc(&r0, &a, &b); - polyvec_pointwise_acc_jazz(&r1, &a, &b); - - for(int j=0;j -#include "../poly.h" -#include "../polyvec.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - fclose(urandom); -} - -int main(void) -{ - polyvec r0, r1; - - polyvec_setrandom(&r0); - - for(int i = 0;i -#include "../polyvec.h" -#include "../ntt.h" - -void polyvec_setrandom(polyvec *r) -{ - FILE *urandom = fopen("/dev/urandom", "r"); - for(int i=0;ivec[i].coeffs, sizeof(int16_t), KYBER_N, urandom); - for(int i=0;ivec[i].coeffs[j] %= KYBER_Q; - fclose(urandom); -} - -int main(void) -{ - unsigned char out0[KYBER_POLYVECBYTES]; - unsigned char out1[KYBER_POLYVECBYTES]; - polyvec a; - - polyvec_setrandom(&a); - - polyvec_tobytes(out0, &a); - polyvec_tobytes_jazz(out1, &a); - - for(int i=0;i reg u64 -{ - reg u64 cnd t64; - reg u8 t1 t2; - inline int i; - - cnd = 0; - - for i=0 to KYBER_CT_LEN - { - t1 = ctpc.[i]; - t2 = (u8)[ctp + i]; - t1 ^= t2; - t64 = (64u)t1; - cnd |= t64; - } - - cnd = -cnd; - cnd >>= 63; - - return cnd; -} - -inline -fn __cmov(reg ptr u8[KYBER_SYMBYTES] dst, reg u64 src cnd) -> reg ptr u8[KYBER_SYMBYTES] -{ - reg u8 t1 t2 bcond; - inline int i; - - cnd = -cnd; - - for i=0 to KYBER_SYMBYTES - { - t1 = dst.[i]; - t2 = (u8)[src + i]; - t2 = t2 ^ t1; - t2 = t2 & cnd; - t1 ^= t2; - dst.[u8 i] = t1; - } - - return dst; -} diff --git a/code/jasmin/ref/zetas.jinc b/code/jasmin/ref/zetas.jinc deleted file mode 100644 index 17d8f1d0..00000000 --- a/code/jasmin/ref/zetas.jinc +++ /dev/null @@ -1,18 +0,0 @@ -u16[128] jzetas = {2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628}; - - -u16[128] jzetas_inv = {1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441}; diff --git a/proof/spec/KyberSecurity.ec.deprecated b/proof/spec/KyberSecurity.ec.deprecated deleted file mode 100644 index 4f72e12c..00000000 --- a/proof/spec/KyberSecurity.ec.deprecated +++ /dev/null @@ -1,1911 +0,0 @@ -require import AllCore IntDiv Distr List DList PKE_ROM. -require import Array32 Array64 Array128 Array168 Array256 Array384. -require import Array768 Array960 Array1024 Array1152. -from Jasmin require import JWord. - -(*******************************************************************) -(* The security definitions for the spec *) -(*******************************************************************) - -clone import PKE_ROM as MLKEMPKE with - type pkey = W8.t Array1152.t * W8.t Array32.t, - type skey = W8.t Array1152.t, - type plaintext = W8.t Array32.t, - type ciphertext = W8.t Array960.t * W8.t Array128.t. -import RO. - - -(************************************************************************** - MLWE_PKE is the theory where we prove an abstract PKE construction - that matches the algebraic basis of MLKEM. This theory clones MLWE - to get the hard problem definitions, and so it gets a bunch of - theories and RO clones. The relevant one is RO_SMP. - - It also independently clones PKE_Ext as PKE_, but it already fixes - the underlying RO to MLWE_.RO_SMP, so all we need to fix is RO_SMP in - the MWLW_.RO_SMP. - - This clone gives us the security proofs for the algebraic version - of MLKEMPKE. To get proofs for MLKEM spec, we just need to explain - how the MLKEM spec functionally maps to this algebraic version - and then derive the properties as corollaries. -***************************************************************************) - - -require import MLWE_PKE. -require import GFq Rq VecMat Sampling Symmetric Serialization InnerPKE MLKEM Correctness EncDecCorrectness MLKEMLib. - -import KMatrix PolyVec PolyMat InnerPKE Zq. - -op pk_encode(pk : polyvec * W8.t Array32.t) : pkey = - (encode12_vec (toipolyvec (nttv pk.`1)) ,pk.`2). -op pk_decode(pk : pkey) = (invnttv (ofipolyvec (sem_decode12_vec (pk.`1))),pk.`2). -op sk_encode(sk : polyvec) : skey = encode12_vec (toipolyvec (nttv sk)). -op sk_decode(sk : skey) = invnttv (ofipolyvec (sem_decode12_vec sk)). -op m_encode(m : plaintext) : poly = decompress_poly 1 (sem_decode1 m). -op m_decode(p : poly) : plaintext = encode1 (compress_poly 1 p). -op c_encode(c : polyvec * poly) : ciphertext = - (encode10_vec_aux (compress_polyvec 10 c.`1), encode4 (compress_poly 4 c.`2)). -op c_decode(c : ciphertext) = - (decompress_polyvec 10 (sem_decode10_vec c.`1), decompress_poly 4 (sem_decode4 c.`2)). -op rnd_err_v = compress_poly_err 4. -op rnd_err_u = mapv (compress_poly_err 10). - -op max_noise = q %/ 4 - 1. - -op pe_R = pe^256. -op pv = pe_R^(kvec). -op pm = pe_R^(kvec^2). - -op under_noise_bound (p : poly) (b : int) = - all (fun cc => `| as_sint cc| <= b) p. - -op cv_bound_max : int = 104. (* this is the compress error bound for d = 4 *) - -op dplaintext : plaintext distr = srand. - -clone import MLWE_PKE as MLWEPKE with - type MLWE_.MLWE_SMP.RO_SMP.in_t = in_t, - type MLWE_.MLWE_SMP.RO_SMP.out_t = out_t, - op MLWE_.MLWE_SMP.RO_SMP.dout = dout, - type MLWE_.MLWE_SMP.RO_SMP.d_in_t = d_in_t, - type MLWE_.MLWE_SMP.RO_SMP.d_out_t = d_out_t, - type MLWE_.Matrix_.R = poly, - type MLWE_.Matrix_.Matrix.matrix <- polymat, - type MLWE_.Matrix_.vector <- polyvec, - type MLWE_.Matrix_.ZR.t <- poly, - pred MLWE_.Matrix_.ZR.unit <- KMatrix.ZR.unit, - op MLWE_.Matrix_.ZR.(+) <- Rq.(&+), - op MLWE_.Matrix_.ZR.([-]) <- Rq.(&-), - op MLWE_.Matrix_.ZR.zeror <- Rq.zero, - op MLWE_.Matrix_.ZR.oner <- Rq.one, - op MLWE_.Matrix_.ZR.( * ) <- Rq.(&*), - op MLWE_.Matrix_.ZR.invr <- Correctness.invr, - op MLWE_.Matrix_.size <- kvec, - op MLWE_.Matrix_.Vector.(+) <- Correctness.KMatrix.Vector.(+), - op MLWE_.Matrix_.Vector.dotp <- dotp, - op MLWE_.Matrix_.Vector.prevector <- prevector, - op MLWE_.Matrix_.Vector.vclamp <- vclamp, - op MLWE_.Matrix_.Vector.tofunv <- tofunv, - op MLWE_.Matrix_.Vector.offunv <- offunv, - op MLWE_.Matrix_.Matrix.prematrix <- prematrix, - op MLWE_.Matrix_.Matrix.mclamp <- mclamp, - op MLWE_.Matrix_.Matrix.tofunm <- tofunm, - op MLWE_.Matrix_.Matrix.offunm <- offunm, - op MLWE_.duni_R <- duni_R, - op MLWE_.dshort_R <- dshort_R, - type MLWE_.seed <- W8.t Array32.t , - op MLWE_.dseed <- srand, - type pkey <- pkey, - type skey <- skey, - op pk_encode <- pk_encode, - op sk_encode <- sk_encode, - op pk_decode <- pk_decode, - op sk_decode <- sk_decode, - type plaintext <- plaintext, - type ciphertext <- ciphertext, - op m_encode <- m_encode, - op m_decode <- m_decode, - op c_encode <- c_encode, - op c_decode <- c_decode, - op rnd_err_v <- rnd_err_v, - op rnd_err_u <- rnd_err_u, - op under_noise_bound <- under_noise_bound, - op max_noise <- max_noise, - op cv_bound_max <- cv_bound_max, - op PKE_ROM.dplaintext = srand - proof MLWE_.dseed_ll by (apply srand_ll) - proof MLWE_.dshort_R_ll by apply dshort_R_ll - proof MLWE_.duni_R_ll by apply duni_R_ll - proof MLWE_.duni_R_fu by apply duni_R_fu - proof MLWE_.Matrix_.ge0_size by smt() - proof MLWE_.Matrix_.ZR.addrA by apply KMatrix.ZR.addrA - proof MLWE_.Matrix_.ZR.addrC by apply KMatrix.ZR.addrC - proof MLWE_.Matrix_.ZR.add0r by apply KMatrix.ZR.add0r - proof MLWE_.Matrix_.ZR.addNr by apply KMatrix.ZR.addNr - proof MLWE_.Matrix_.ZR.oner_neq0 by apply KMatrix.ZR.oner_neq0 - proof MLWE_.Matrix_.ZR.mulrA by apply KMatrix.ZR.mulrA - proof MLWE_.Matrix_.ZR.mulrC by apply KMatrix.ZR.mulrC - proof MLWE_.Matrix_.ZR.mul1r by apply KMatrix.ZR.mul1r - proof MLWE_.Matrix_.ZR.mulrDl by apply KMatrix.ZR.mulrDl - proof MLWE_.Matrix_.ZR.mulVr by apply KMatrix.ZR.mulVr - proof MLWE_.Matrix_.ZR.unitP by apply KMatrix.ZR.unitP - proof MLWE_.Matrix_.ZR.unitout by apply KMatrix.ZR.unitout - proof MLWE_.Matrix_.Vector.tofunv_prevector by apply tofunv_prevector - proof MLWE_.Matrix_.Vector.tofunvK by apply tofunvK - proof MLWE_.Matrix_.Vector.offunvK by apply offunvK - proof MLWE_.Matrix_.Matrix.tofunm_prematrix by apply tofunm_prematrix - proof MLWE_.Matrix_.Matrix.tofunmK by apply tofunmK - proof MLWE_.Matrix_.Matrix.offunmK by apply offunmK - proof MLWE_.duni_R_uni by apply duni_R_uni - proof PKE_ROM.dplaintext_ll by apply srand_ll - proof *. - - -realize pk_encodeK. -rewrite /pk_decode /pk_encode /cancel /= => x. -rewrite sem_decode12_vec_corr -sem_encode12_vecK; last by rewrite toipolyvecK invnttvK /#. -move => i ib; rewrite /toipolyvec !mapiE /= 1:ib. -by smt(rg_asint qE). -qed. - -realize sk_encodeK. -rewrite /sk_decode /sk_encode /cancel /= => x. -rewrite sem_decode12_vec_corr -sem_encode12_vecK; last by rewrite toipolyvecK invnttvK /#. -move => i ib; rewrite /toipolyvec !mapiE /= 1:ib. -by smt(rg_asint qE). -qed. - -realize encode_noise. -move => /> u v. -rewrite /c_decode /c_encode /rnd_err_u /rnd_err_v /z sem_decode10_vec_corr /= -sem_encode10_vecK. -+ move => i ib; rewrite /compress_polyvec !mapiE /= 1:ib /compress /= /#. -rewrite sem_decode4_corr -sem_encode4K /=. - by move => i ib; rewrite /compress_poly !mapiE /= 1:ib /compress /= /#. -split; last by rewrite round_poly_errE. -rewrite /(+) mapvE /=. -apply eq_vectorP => /> i il ih. rewrite !offunvE /=;1: smt(). -rewrite offunvE 1:/# /= /compress_poly_err /=. -apply Array256.tP => k kb. -rewrite /decompress_polyvec /compress_polyvec /= /fromarray256 /= /Rq.(&+) !getvE !setvE /= !offunvE 1:/# /=. -case (i = 2). -+ move => -> /=; - rewrite mapiE // map2E //= initiE //= initiE //= mapiE //= 1:/# initiE 1:/# /=. - rewrite decompress_errE //; 1: smt(qE). - by rewrite mapiE /#. -case (i = 1). -+ move => -> /=. - rewrite !offunvK /vclamp /kvec /= mapiE // map2E //= initiE //= initiE //= mapiE //= 1:/# initiE 1:/# /=. - rewrite decompress_errE //; 1: smt(qE). - by rewrite mapiE /#. -move => *;rewrite ifF 1:/# !offunvK /vclamp /kvec /= ifT 1:/# ifF 1:/# ifT 1:/# ifT 1:/#. - rewrite mapiE // map2E //= initiE //= initiE //= mapiE //= 1:/# initiE 1:/# /=. - rewrite decompress_errE //; 1: smt(qE). - by rewrite mapiE /#. -qed. - -realize good_decode. -rewrite /under_noise_bound /m_encode /m_decode /compress_poly - /decompress_poly /max_noise qE /= => m n. -rewrite allP => /= hgood. -rewrite sem_decode1_corr. -have : decode1 (encode1 (map (compress 1) (map (decompress 1) (decode1 m) &+ n))) = - (decode1 m); last by smt(sem_decode1K). -apply Array256.ext_eq => /> x h0x hx256. -rewrite -sem_encode1K. -+ move => i ib; rewrite !mapiE /= 1:ib /compress /= /#. -rewrite /(&+) mapiE 1:/# map2E /= initiE /= 1:/# mapiE 1:/#. -rewrite -sem_decode1_corr. -have [->|->] /=: (sem_decode1 m).[x]=0 \/ (sem_decode1 m).[x]=1 - by smt(sem_decode1_bnd). - rewrite /decompress /=. - rewrite from_int_round. - rewrite -{1}zeroE asintK Zq.ZModule.add0r compress1_is0 // Bq1E. - smt(). -rewrite /decompress /round /=. -have ->: q%r / 2%r + inv 2%r = (q%r+1%r)/2%r - by field; smt(). -rewrite qE -fromintD -divz_floor //=. -have: compress 1 (incoeff 1665 + n.[x]) <> 0. - rewrite compress1_is0 Bq1E. - move: (hgood x _) => //. - rewrite (_:832=831+1) 1://. - move=> /absZqP [H|]. - rewrite absZqP negb_or; split. - smt(incoeffK). - rewrite qE /=. - smt(incoeffK). - rewrite qE /=. - smt(incoeffK). -by rewrite /compress /=; smt(ltz_pmod modz_ge0). -qed. - -realize cv_bound_valid. -move=> A s e r e2 m ????? t v. -rewrite /under_noise_bound /rnd_err_v /compress_poly_err /cv_bound. -rewrite allP /compress_err => i Hi /=. -rewrite mapiE //= -Bq4E. -by move: (compress_err_bound v.[i] 4 _ _) => //= /#. -qed. - -realize noise_commutes. -move => n n' maxn b H H0. -move : H H0; rewrite /under_noise_bound. -rewrite !allP. -move => Hn Hnp i ib. -move : (Hn i ib). -move : (Hnp i ib) => /=. -rewrite /as_sint /Rq.(&+) /= map2E !initiE //= Zq.addE qE /= !StdOrder.IntOrder.ler_norml /= => Hni Hnpi. -by smt(). -qed. - -realize noise_preserved. -move => n maxn. -rewrite /under_noise_bound. -rewrite !allP. -rewrite eq_iff; split => /=. -move => H i ib; move : (H i ib). -rewrite /(&-) mapiE 1:/#. -rewrite as_sintN /= /#. -move => H i ib; move : (H i ib). -rewrite /(&-) mapiE 1:/#. -rewrite as_sintN /= /#. -qed. - -(* We now specify the various components used by MLKEM spec so that - we can relate it to the abstract construction. The differences - are only in the sampling procedures. *) -import MLWE_.MLWE_SMP. -import RO. - -module type XOF_RO_t(O : SMP_RO) = { - include XOF_t -}. - -module (KSampler(XOF : XOF_RO_t) : Sampler) (O : SMP_RO) = { - proc sampleA(sd : W8.t Array32.t) : polymat = { - var i,j,c; - var a : polymat; - a <- witness; - i <- 0; - while (i < kvec) { - j <- 0; - while (j < kvec) { - XOF(O).init(sd,j,i); - c <@ Parse(XOF(O)).sample(); - a.[(i,j)] <- c; - j <- j + 1; - } - i <- i + 1; - } - return a; - } - - proc sampleAT(sd : W8.t Array32.t) : polymat = { - var i,j,c; - var a : polymat; - a <- witness; - i <- 0; - while (i < kvec) { - j <- 0; - while (j < kvec) { - XOF(O).init(sd,i,j); - c <@ Parse(XOF(O)).sample(); - a.[(i,j)] <- c; - j <- j + 1; - } - i <- i + 1; - } - return a; - } - -}. - -lemma KSamplerA_ll (XOF <: XOF_RO_t) (O <: SMP_RO) : - (islossless O.get) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).init) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).next_bytes) => - (forall (O0 <: SMP_RO) (XOF0 <: XOF_RO_t), - islossless O0.get => - islossless XOF0(O0).next_bytes => - islossless Parse(XOF0(O0)).sample) => - islossless KSampler(XOF,O).sampleA. -proof. -move => O_ll XOF_init_ll XOF_next_ll Parse_ll. -proc. -while(0 <= i <= kvec) (kvec-i); last by move => *;auto => /> /#. -move => *; wp. -while(#post /\ 0 <= j <= kvec) (kvec-j); last by move => *;auto => /> /#. -move => *; wp. -call (Parse_ll O XOF). -+ apply (XOF_next_ll O). -apply (O_ll). -conseq (_: ==> true); 1: by smt(). -call (XOF_init_ll O). -by auto => />. -qed. - -lemma KSamplerAT_ll (XOF <: XOF_RO_t) (O <: SMP_RO) : - (islossless O.get) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).init) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).next_bytes) => - (forall (O0 <: SMP_RO) (XOF0 <: XOF_RO_t), - islossless O0.get => - islossless XOF0(O0).next_bytes => - islossless Parse(XOF0(O0)).sample) => - islossless KSampler(XOF,O).sampleAT. -proof. -move => O_ll XOF_init_ll XOF_next_ll Parse_ll. -proc. -while(0 <= i <= kvec) (kvec-i); last by move => *;auto => /> /#. -move => *; wp. -while(#post /\ 0 <= j <= kvec) (kvec-j); last by move => *;auto => /> /#. -move => *; wp. -call (Parse_ll O XOF). -+ apply (XOF_next_ll O). -apply (O_ll). -conseq (_: ==> true); 1: by smt(). -call (XOF_init_ll O). -by auto => />. -qed. - - -(************************************* -We define a version of MLKEM spec that -uses external samplers. This is useful for -syntactic reasons in the following proof -steps. -*************************************) - -(* G needs only to be entropy smoothing, which is - exactly a PRF without any input *) - -clone PRF as HS_DEFS with - type D <- unit, - type R <- W8.t Array32.t * W8.t Array32.t. - -clone import HS_DEFS.PseudoRF as HSF with - type K <- W8.t Array32.t, - op dK <- srand, - op F <- fun i _ => G_coins i. - -module KHS = HSF.PseudoRF. - -module G(HS: HSF.PseudoRF) = { - proc sample(s : W8.t Array32.t) : W8.t Array32.t * W8.t Array32.t = { - var rhosig; - rhosig <@ HS.f(s,()); - return rhosig; - } -}. - - -module type NoiseSampler(PRF1 : PRF_.PseudoRF, PRF2 : PRF_.PseudoRF) = { - proc sample2(noiseseed:W8.t Array32.t) : polyvec * polyvec - proc sample3(noiseseed:W8.t Array32.t) : polyvec * polyvec * poly -}. - -import PolyVec PolyMat. - -module (KNS : NoiseSampler) (PRF1 : PRF_.PseudoRF, PRF2 : PRF_.PseudoRF) = { - proc sample2(noiseseed:W8.t Array32.t) : polyvec * polyvec = { - var noise1 : polyvec; - var noise2 : polyvec; - var _N,i,c; - noise1 <- witness; - noise2 <- witness; - _N <- 0; - i <- 0; - while (i < kvec) { - c <@ CBD2_PRF(PRF1).sample(noiseseed,_N); - noise1 <- noise1.[i<-c]; - _N <- _N + 1; - i <- i + 1; - } - i <- 0; - while (i < kvec) { - c <@ CBD2_PRF(PRF1).sample(noiseseed,_N); - noise2 <- noise2.[i<-c]; - _N <- _N + 1; - i <- i + 1; - } - - return (noise1,noise2); - } - - proc sample3(noiseseed:W8.t Array32.t) : polyvec * polyvec * poly = { - var noise1 : polyvec; - var noise2 : polyvec; - var e2,_N,i,c; - noise1 <- witness; - noise2 <- witness; - _N <- 0; - i <- 0; - while (i < kvec) { - c <@ CBD2_PRF(PRF2).sample(noiseseed,_N); - noise1 <- noise1.[i<-c]; - _N <- _N + 1; - i <- i + 1; - } - i <- 0; - while (i < kvec) { - c <@ CBD2_PRF(PRF2).sample(noiseseed,_N); - noise2 <- noise2.[i<-c]; - _N <- _N + 1; - i <- i + 1; - } - e2 <@ CBD2_PRF(PRF2).sample(noiseseed,_N); - return (noise1,noise2, e2); - } -}. - -module MLKEMS(HS : HSF.PseudoRF, S : Sampler, NS : NoiseSampler, PRF1 : PRF_.PseudoRF, PRF2 : PRF_.PseudoRF, O : SMP_RO) (* : Scheme *) = { - - (* Spec gives a derandomized enc that matches this code *) - proc kg_derand(rho sig: W8.t Array32.t) : pkey * skey = { - var t; - var tv,sv : W8.t Array1152.t; - var a : polymat; - var s,e : polyvec; - e <- witness; - s <- witness; - sv <- witness; - tv <- witness; - a <@ S(O).sampleA(rho); - (s,e) <@ NS(PRF1,PRF2).sample2(sig); - s <- nttv s; - e <- nttv e; - t <- (ntt_mmul a s + e)%PolyVec; - tv <@ EncDec.encode12_vec(toipolyvec t); (* minimum residues *) - sv <@ EncDec.encode12_vec(toipolyvec s); (* minimum residues *) - return ((tv,rho),sv); - } - - proc kg() : pkey * skey = { - var s,rho,sig,kp; - s <@ HS.keygen(); - (rho,sig) <@ G(HS).sample(s); - kp <@ kg_derand(rho,sig); - return kp; - } - - (* Spec gives a derandomized enc that matches this code *) - proc enc_derand(pk : pkey, m : plaintext, r : W8.t Array32.t) : ciphertext = { - var tv,rho,rv,e1,e2,rhat,u,v,mp,c2,thati; - var that : polyvec; - var aT : polymat; - var c1 : W8.t Array960.t; - aT <- witness; - c1 <- witness; - e1 <- witness; - rv <- witness; - that <- witness; - (tv,rho) <- pk; - thati <@ EncDec.decode12_vec(tv); - that <- ofipolyvec thati; - aT <@ S(O).sampleAT(rho); - (rv,e1,e2) <@ NS(PRF1,PRF2).sample3(r); - rhat <- nttv rv; - u <- (invnttv (ntt_mmul aT rhat) + e1)%PolyVec; - mp <@ EncDec.decode1(m); - v <- invntt (ntt_dotp that rhat) &+ e2 &+ decompress_poly 1 mp; - c1 <@ EncDec.encode10_vec(compress_polyvec 10 u); - c2 <@ EncDec.encode4(compress_poly 4 v); - return (c1,c2); - } - - proc enc(pk : pkey, m : plaintext) : ciphertext = { - var r,c; - r <@ PRF2.keygen(); - c <@ enc_derand(pk,m,r); - return c; - } - - proc dec(sk : skey, cph : ciphertext) : plaintext option = { - var m,mp,ui,v,vi,si, c1, c2; - var u,s : polyvec; - u <- witness; - s <- witness; - (c1,c2) <- cph; - ui <@ EncDec.decode10_vec(c1); - u <- decompress_polyvec 10 ui; - vi <@ EncDec.decode4(c2); - v <- decompress_poly 4 vi; - si <@ EncDec.decode12_vec(sk); - s <- ofipolyvec si; - mp <- v &+ ((&-) (invntt (ntt_dotp s (nttv u)))); - m <@ EncDec.encode1(compress_poly 1 mp); - return Some m; - } - -}. - -module InnerPKE_rnd = { - include InnerPKE - - proc kg() : pkey * skey = { - var s,kp; - s <$ srand; - kp <@ InnerPKE.kg_derand(s); - return kp; - } - - proc enc(pk : pkey, m : plaintext) : ciphertext = { - var r,c; - r <$ srand; - c <@ enc_derand(pk,m,r); - return c; - } - - -}. - -module XOF_Dummy(O : SMP_RO) = { - include XOF -}. - -lemma getv_setvE x i j (v : polyvec) : - (v.[i <- x].[j])%Vector = if (0 <= j < kvec) then if (i = j) then x else (v.[j])%Vector else Rq.zero - by smt(setvE getv_out offunvE). - -(* We have that instantiating S with KSampler(XOF_Dummy) we get the Spec *) -lemma kg_sampler_kg (O <: SMP_RO {-XOF}) : - equiv [ MLKEMS(KHS,KSampler(XOF_Dummy),KNS,KPRF,KPRF,O).kg ~ InnerPKE_rnd.kg : - ={arg} /\ ={glob O, glob XOF} ==> ={res, glob O, glob XOF}]. -proc. -inline {1} 3; inline {2} 2. sim. -inline {1} 10. wp;conseq />. -while (noise1{1} = s0{2} /\ rho0{1} = rho{2} /\ noiseseed{1} = sig{2} /\ - ={i,_N,a,XOF.state} /\ - 0<=i{1}<=kvec /\ - forall k, 0 <=k < i{1} => (noise2{1}.[k])%Vector = (e{2}.[k])%Vector). -+ wp; conseq(_: ={c}); 1: by smt(getv_setvE). - by inline*; sim; auto => />. -wp;conseq (: noise1{1} = s0{2} /\ - rho0{1} = rho{2} /\ - noiseseed{1} = sig{2} /\ - ={_N, a, XOF.state}); first by smt(eq_vectorP). -while (rho0{1} = rho{2} /\ noiseseed{1} = sig{2} /\ - ={i,_N,a,XOF.state} /\ - 0<=i{1}<=kvec /\ - forall k, 0 <=k < i{1} => (noise1{1}.[k])%Vector = (s0{2}.[k])%Vector). -+ wp; conseq(_: ={c}); 1: by smt(getv_setvE). - by inline*; sim; auto => />. -swap {2} 9 2. swap {1} [10..12] -1. -wp;conseq (: rho0{1} = rho{2} /\ - noiseseed{1} = sig{2} /\ - ={a, XOF.state}); 1: by smt(eq_vectorP). -by inline *;sim;auto => />. -qed. - -lemma enc_sampler_enc (O <: SMP_RO {-XOF}) : - equiv [ - MLKEMS(KHS,KSampler(XOF_Dummy),KNS,KPRF,KPRF,O).enc ~ InnerPKE_rnd.enc : - ={arg} /\ ={glob O, glob XOF} ==> ={res, glob O, glob XOF}]. -proc. -inline {1} 2; inline {2} 2. sim. -inline {1} 14. -wp;conseq />. -inline {1} 22. inline {1} 25. inline {2} 20. swap {2} 21 -1. -sim;wp;conseq (: ={that,_N,m0,aT,XOF.state} /\ - noise2{1} = e1{2} /\ - noise1{1} = rv{2} /\ - noiseseed{1} = coins{2}); 1: smt(). -while (={i, that, _N, m0, aT, XOF.state} /\ noise1{1} = rv{2} /\ noiseseed{1} = coins{2} /\ - 0<=i{1}<=kvec /\ - forall k, 0 <=k < i{1} => (noise2{1}.[k])%Vector = (e1{2}.[k])%Vector). -+ wp; conseq(_: ={c0}); 1: by smt(getv_setvE). - by inline*; sim; auto => />. -wp;conseq (: ={that,_N,m0,aT,XOF.state} /\ - noise1{1} = rv{2} /\ - noiseseed{1} = coins{2}); first by smt(eq_vectorP). -while (={i, that, _N, m0, aT, XOF.state} /\ noiseseed{1} = coins{2} /\ - 0<=i{1}<=kvec /\ - forall k, 0 <=k < i{1} => (noise1{1}.[k])%Vector = (rv{2}.[k])%Vector). -+ wp; conseq(_: ={c0}); 1: by smt(getv_setvE). - by inline*; sim; auto => />. -swap {2} 11 4. swap {1} [14..16] -1. -wp;conseq (: ={that, m0, aT, XOF.state} /\ noiseseed{1} = coins{2}); 1: by smt(eq_vectorP). -inline {1} 16. -wp;conseq />;sim. -wp;conseq />. -call(_: true); 1: by sim. -by inline *;auto => />. -qed. - -lemma enc_sampler_dec (O <: SMP_RO) : - equiv [ MLKEMS(KHS,KSampler(XOF_Dummy),KNS,KPRF,KPRF,O).dec ~ InnerPKE_rnd.dec : - ={arg} /\ ={glob O} ==> res{1} = Some res{2} ] by proc;inline *;sim => /#. - - -(*******************************************************************) -(* Entropy Smoothing and PRF hops for Security *) -(*******************************************************************) - -abbrev dsmooth = darray32 W8.dword `*` darray32 W8.dword. - -lemma dsmooth_ll: is_lossless dsmooth - by apply dprod_ll;split;apply darray32_ll;apply W8.dword_ll. - -clone import HS_DEFS.RF as IdealHSF with - op dR = fun (_: unit) => dsmooth - proof dR_ll by smt(dsmooth_ll). - -abbrev dnbytes = darray128 W8.dword. - -lemma dnbytes_ll: is_lossless dnbytes. -proof. -apply darray128_ll. -by apply W8.dword_ll. -qed. - -clone import PRF_DEFS.RF as IdealPRF1 with - op dR = fun (_: W8.t) => dnbytes - proof dR_ll by smt(dnbytes_ll). - -clone import PRF_DEFS.RF as IdealPRF2 with - op dR = fun (_: W8.t) => dnbytes - proof dR_ll by smt(dnbytes_ll). - -module DummyHS_D(F : HS_DEFS.PRF_Oracles) = { - proc keygen() : W8.t Array32.t = { return witness;} - proc f(inp : W8.t Array32.t * unit) : W8.t Array32.t * W8.t Array32.t = { var out; out <@ F.f(); return out; } -}. - -module DummyHS(F : HS_DEFS.PRF_Oracles) = { - proc keygen() : W8.t Array32.t = { IdealHSF.RF.m <- SmtMap.empty; return witness;} - proc f(inp : W8.t Array32.t * unit) : W8.t Array32.t * W8.t Array32.t = { var out; out <@ F.f(); return out; } -}. - -module (D_ES(S : Sampler, O : RO, As : MLKEMPKE.Adversary) : HS_DEFS.Distinguisher) (F : HS_DEFS.PRF_Oracles) = { - - proc distinguish() : bool = { - var b; - b <@ MLKEMPKE.CPA(O,MLKEMS(DummyHS_D(F),S,KNS,KPRF,KPRF),As).main(); - return b; - } -}. - -module MLKEMSIdeal(HS : HSF.PseudoRF, S : Sampler, NS : NoiseSampler, PRF1 : PRF_.PseudoRF, PRF2 : PRF_.PseudoRF, O : SMP_RO) (* : Scheme *) = { - - include MLKEMS(HS,S,NS,PRF1,PRF2,O) [-kg] - - proc kg() : pkey * skey = { - var s,rho,sig,kp; - s <@ HS.keygen(); - rho <$ srand; - sig <@ PRF1.keygen(); - kp <@ kg_derand(rho,sig); - return kp; - } - -}. - -module DummyPRF1D(F : PRF_DEFS.PRF_Oracles) = { - proc keygen() : W8.t Array32.t = { return witness;} - proc f(inp : W8.t Array32.t * W8.t) : W8.t Array128.t = { var out; out <@ F.f(inp.`2); return out; } -}. - -module DummyPRF1(F : PRF_DEFS.PRF_Oracles) = { - proc keygen() : W8.t Array32.t = { IdealPRF1.RF.m <- SmtMap.empty; return witness;} - proc f(inp : W8.t Array32.t * W8.t) : W8.t Array128.t = { var out; out <@ F.f(inp.`2); return out; } -}. - -module (D_PRF1(S : Sampler, O : RO, As : MLKEMPKE.Adversary) : PRF_DEFS.Distinguisher) (F : PRF_DEFS.PRF_Oracles) = { - - proc distinguish() : bool = { - var b; - b <@ MLKEMPKE.CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(F),KPRF),As).main(); - return b; - } -}. - -module DummyPRF2D(F : PRF_DEFS.PRF_Oracles) = { - proc keygen() : W8.t Array32.t = { return witness;} - proc f(inp : W8.t Array32.t * W8.t) : W8.t Array128.t = { var out; out <@ F.f(inp.`2); return out; } -}. - -module DummyPRF2(F : PRF_DEFS.PRF_Oracles) = { - proc keygen() : W8.t Array32.t = { IdealPRF2.RF.m <- SmtMap.empty; return witness;} - proc f(inp : W8.t Array32.t * W8.t) : W8.t Array128.t = { var out; out <@ F.f(inp.`2); return out; } -}. - -module (D_PRF2(S : Sampler, O : RO, As : MLKEMPKE.Adversary) : PRF_DEFS.Distinguisher) (F : PRF_DEFS.PRF_Oracles) = { - - proc distinguish() : bool = { - var b; - b <@ MLKEMPKE.CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),DummyPRF2D(F)),As).main(); - return b; - } -}. - -section. - -declare module O <: RO {-IdealPRF2.RF, -IdealPRF1.RF, -PRF_.PRF, -IdealHSF.RF, -KPRF, -B1ROM, -B2ROM, -HSF.PRF}. -declare module S <: Sampler {-IdealPRF2.RF, -IdealPRF1.RF, -PRF_.PRF, -IdealHSF.RF, -KPRF, -O, -B1ROM, -B2ROM}. -declare module As <: MLKEMPKE.Adversary {-IdealPRF2.RF, -IdealPRF1.RF, -PRF_.PRF, -IdealHSF.RF, -HSF.PRF, -O, -S, -KPRF, -B1ROM, -B2ROM}. - -lemma ESHop &m : - Pr [ MLKEMPKE.CPA(O,MLKEMS(KHS,S,KNS,KPRF,KPRF),As).main() @ &m : res] - - Pr [ MLKEMPKE.CPA(O,MLKEMS(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),As).main() @ &m : res] = - Pr [ HS_DEFS.IND(HSF.PRF,D_ES(S,O,As)).main() @ &m : res ] - - Pr [ HS_DEFS.IND(IdealHSF.RF,D_ES(S,O,As)).main() @ &m : res ]. -proof. -have -> : Pr[CPA(O,MLKEMS(KHS, S, KNS, KPRF,KPRF), As).main() @ &m : res] = - Pr[HS_DEFS.IND(HSF.PRF, D_ES(S, O, As)).main() @ &m : res] . -+ byequiv => //. - proc. - inline {1} 2; inline {2} 2. inline {2} 2. inline {2} 3. - wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - rnd. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk, glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob As, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 2. inline {2} 1. - seq 3 3 : (#pre /\ ={rho,sig}); 1: by inline *; sim. - by sim. - -+ have -> : - Pr[CPA(O,MLKEMS(DummyHS(IdealHSF.RF), S, KNS, KPRF, KPRF), As).main() @ &m : res] = - Pr[HS_DEFS.IND(IdealHSF.RF, D_ES(S, O, As)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {1} 2; inline {2} 2. inline {2} 2. inline {2} 3. - wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - rnd. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk, glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob As, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 2. inline {2} 1. - seq 3 3 : (#pre /\ ={rho,sig}). - + inline *; sim. - by sim. -done. -qed. - - -module ArraySample = { - proc sL() = { - var r,rho,k; - r <$ IdealHSF.dR (); - rho <- Array32.init (fun (i : int) => r.`1.[i]); - k <- Array32.init (fun (i : int) => r.`2.[i]); - return (rho,k); - } - - proc sR() = { - var rho,k; - rho <$ srand; - k <$ srand; - return (rho,k); - } -}. - - -require import DProd. -clone ProdSampling with - type t1 <- W8.t Array32.t, - type t2 <- W8.t Array32.t. - -lemma srand_fu : is_full srand. -rewrite /darray32 /is_full => x. -rewrite supp_dmap. -exists (to_list x). -rewrite to_listK /=. -rewrite supp_dlist 1:/# size_to_list /= allP => *. -by rewrite dword_fu. -qed. - -lemma arrsample : - equiv [ ArraySample.sL ~ ArraySample.sR : true ==> ={res} ]. -proc;wp;rndsem {2} 0;auto => />. -have <- : dlet srand (fun (rho : W8.t Array32.t) => dmap srand (fun (k : W8.t Array32.t) => (rho, k))) = (IdealHSF.dR ()). -+ by rewrite /dR /dmap dprod_dlet;congr;apply fun_ext => *;congr;rewrite /(\o) /=. -move => ?rl?;split => *;last by split;rewrite tP => *;rewrite initiE /#. -rewrite supp_dlet /=;exists rl.`1;split => *;1: apply srand_fu. -rewrite supp_dmap /=;exists rl.`2;split => *; last by smt(). -by apply srand_fu. -qed. - -lemma MLKEMS_MLKEMIdeal &m : - Pr [ MLKEMPKE.CPA(O,MLKEMS(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),As).main() @ &m : res] = - Pr [ MLKEMPKE.CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),As).main() @ &m : res]. -proof. -byequiv => //. -proc. - inline {1} 2; inline {2} 2. - wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - rnd. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk, glob O,glob S}); 1: smt(). - seq 1 1 : (={glob As, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 1. - wp; call(_: ={glob O, glob S}); 1: by sim. - conseq => />. - inline *. - rcondt{1} 6; 1: by move => *; auto => />;smt(@SmtMap). - seq {1} 5 2 : (true); 1: by auto. - wp. - conseq (_: _ ==> rho{2} = Array32.init (fun i => r{1}.`1.[i]) /\ - k{2} = Array32.init (fun i => r{1}.`2.[i])); first - by move=> /> &1 *; rewrite SmtMap.get_set_sameE /=;split;rewrite tP => *;rewrite initiE /#. - - transitivity {1} { r <@ ArraySample.sL(); } - (true ==> ={r}) (true ==> rho{2} = Array32.init (fun i => r{1}.`1.[i]) /\ k{2} = Array32.init (fun i => r{1}.`2.[i])) => //; first - by inline*; auto => /> &1 ? [rl1 rl2] /= => *;split;rewrite tP => *;rewrite initiE /#. - - transitivity {2} { (rho, k) <@ ArraySample.sR(); } - (true ==> rho{2} = Array32.init (fun i => r{1}.`1.[i]) /\ k{2} = Array32.init (fun i => r{1}.`2.[i])) (true ==> ={rho,k})=> //. - call arrsample; 1: by auto => /> *;split;rewrite tP => *;rewrite initiE /#. - by inline*; auto => /> &1 ? [rl1 rl2] /= => *;split;rewrite tP => *;rewrite initiE /#. -qed. - - -lemma PRFHop1 &m : - Pr [ MLKEMPKE.CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),As).main() @ &m : res] - - Pr [ MLKEMPKE.CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),KPRF),As).main() @ &m : res] = - Pr [ PRF_DEFS.IND(PRF_.PRF,D_PRF1(S,O,As)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF1.RF,D_PRF1(S,O,As)).main() @ &m : res ]. -proof. -have -> : Pr[CPA(O, MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, KPRF, KPRF), As).main() @ &m : res] = - Pr[PRF_DEFS.IND(PRF_.PRF, D_PRF1(S, O, As)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {1} 2; inline {2} 2. inline {2} 2. inline {2} 3. - wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - rnd. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk, glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob As, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 2. - swap {2} [2..4] -1. - seq 3 3 : (#pre /\ ={glob IdealHSF.RF, rho,s}). - + by inline *; sim. - seq 1 2 : (#pre /\ sig{1} = PRF_.PRF.k{2});1: by inline *; auto => />. - inline {1} 1; inline {2} 1. - sim 8 8. - seq 7 7 : (#pre /\ ={a,rho0} /\ sig0{1} = PRF_.PRF.k{2}); 1: by sim. - conseq => />. - inline {1} 1; inline {2} 1. - inline *. - unroll for {1} 8. - unroll for {2} 8. - do 3!( wp; conseq => />; sim). - wp;auto => />;sim. - unroll for {1} 6. - unroll for {2} 6. - do 3!( wp; conseq => />; sim). - by wp;auto => />;sim. - -have -> : Pr[CPA(O, MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, DummyPRF1(IdealPRF1.RF), KPRF), As).main() @ &m : res] = - Pr[PRF_DEFS.IND(IdealPRF1.RF, D_PRF1(S, O, As)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {1} 2; inline {2} 2. inline {2} 2. inline {2} 3. - wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - rnd. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk, glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob As, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 2. - swap {2} [2..4] -1. - seq 3 3 : (#pre /\ ={glob IdealHSF.RF, rho,s}). - + by inline *; sim. - seq 1 2 : (#pre /\ ={glob IdealPRF1.RF,sig});1: by inline *; auto => />. - by inline *;sim. -done. -qed. - -lemma PRFHop2 &m : - Pr [ MLKEMPKE.CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),KPRF),As).main() @ &m : res] - - Pr [ MLKEMPKE.CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),DummyPRF2(IdealPRF2.RF)),As).main() @ &m : res] = - Pr [ PRF_DEFS.IND(PRF_.PRF,D_PRF2(S,O,As)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF2.RF,D_PRF2(S,O,As)).main() @ &m : res ]. -proof. -have -> : Pr[CPA(O,MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, DummyPRF1(IdealPRF1.RF), KPRF), As).main() @ &m : res] = - Pr[PRF_DEFS.IND(PRF_.PRF, D_PRF2(S, O, As)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {2}2 . inline {2} 2. - wp; call(_: ={glob O}); 1: by sim. - swap {2} 1 4. - seq 4 4 : (={glob S, glob As, glob O,pk,sk,m0,m1} /\ b{1} = b1{2}). - + rnd;wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - by inline *; auto => />; sim. - conseq (_: _ ==> ={c, glob O,glob S}); 1: smt(). - inline {1} 1. inline {2} 2. - swap {2} [2..3] -1; sp 2 2. - seq 1 2 : (#pre /\ r{1} = PRF_.PRF.k{2});1: by inline *; auto => />. - inline {1} 1; inline {2} 1. - sim 13 13. - seq 12 12 : (={that, m2, aT, glob S, glob O} /\ r0{1} = PRF_.PRF.k{2}); 1: by sim. - conseq => />. - inline {1} 1; inline {2} 1. - inline *. - wp;auto => />;sim. - unroll for {1} 8. - unroll for {2} 8. - do 3!( wp; conseq => />; sim). - wp;auto => />;sim. - unroll for {1} 6. - unroll for {2} 6. - do 3!( wp; conseq => />; sim). - by wp;auto => />;sim. - -have -> : Pr[CPA(O, MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, DummyPRF1(IdealPRF1.RF), DummyPRF2(RF)), As).main - () @ &m : res] = - Pr[PRF_DEFS.IND(RF, D_PRF2(S, O, As)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {2} 2. inline {2} 2. - wp; call(_: ={glob O}); 1: by sim. - swap {2} 1 4. - seq 4 4 : (={glob S, glob As, glob O,pk,sk,m0,m1} /\ b{1} = b1{2}). - rnd. - wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - by inline *; auto => />; sim. - conseq (_: _ ==> ={c, glob O,glob S}); 1: smt(). - inline {1} 1. inline {2} 2. - swap {2} [2..3] -1; sp 2 2. - seq 1 2 : (#pre /\ ={glob IdealPRF2.RF,r});1: by inline *; auto => />. - by inline *;sim => /> /#. -done. -qed. - - -end section. - -(*******************************************************************) -(* At this point we get some nice equivalences *) -(*******************************************************************) - -module MLKEMSI(S : Sampler, O : SMP_RO) = - MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),DummyPRF2(IdealPRF2.RF), O). - -module (NTTSampler(S : Sampler) : Sampler) (O : SMP_RO) = { - proc sampleA(sd : W8.t Array32.t) : polymat = { - var a; - a <@ S(O).sampleA(sd); - return invnttm a; - } - - proc sampleAT(sd : W8.t Array32.t) : polymat = { - var a; - a <@ S(O).sampleAT(sd); - return invnttm a; - } - -}. - -phoare sem_decode1 a : [ EncDec.decode1 : arg = a ==> res = decode1 a ] = 1%r - by bypr => &m ->; have /= := (decode1_opsem &m (decode1 a) a). -phoare sem_encode1 a : [ EncDec.encode1 : arg = a ==> res = encode1 a ] = 1%r - by bypr => &m ->; have /= := (encode1_opsem &m (encode1 a) a). -phoare sem_encode4 a : [ EncDec.encode4 : arg = a ==> res = encode4 a ] = 1%r - by bypr => &m ->; have /= := (encode4_opsem &m (encode4 a) a). -phoare sem_decode4 a : [ EncDec.decode4 : arg = a ==> res = decode4 a ] = 1%r - by bypr => &m ->; have /= := (decode4_opsem &m (decode4 a) a). - -phoare sem_encode10_vec a : [ EncDec.encode10_vec : arg = a ==> res = encode10_vec_aux a ] = 1%r. -proof. - bypr => &m ->; have /= <- := (encode10_vec_aux_opsem &m (encode10_vec_aux a) a). - by byequiv (encode10_vec_aux) => //. -qed. - -phoare sem_decode10_vec a : [ EncDec.decode10_vec : arg = a ==> res = decode10_vec_aux a ] = 1%r. -proof. - bypr => &m ->; have /= <- := (decode10_vec_aux_opsem &m (decode10_vec_aux a) a). - by byequiv (decode10_vec_aux) => //. -qed. - -phoare sem_encode12_vec a : [ EncDec.encode12_vec : arg = a ==> res = encode12_vec_aux a ] = 1%r. -proof. - bypr => &m ->; have /= <- := (encode12_vec_aux_opsem &m (encode12_vec_aux a) a). - by byequiv (encode12_vec_aux) => //. -qed. - -phoare sem_decode12_vec a : [ EncDec.decode12_vec : arg = a ==> res = decode12_vec_aux a ] = 1%r. -proof. - bypr => &m ->; have /= <- := (decode12_vec_aux_opsem &m (decode12_vec_aux a) a). - by byequiv (decode12_vec_aux) => //. -qed. - -section. - -declare module O <: RO {-IdealHSF.RF,-RF,-IdealPRF1.RF}. -declare module S <: Sampler {-IdealHSF.RF,-RF, -O, -IdealPRF1.RF}. - -equiv keygen_eq : - MLWE_PKE(NTTSampler(S,O), O).kg ~ MLKEMSI(S,O).kg : - ={glob S, glob O} ==> ={res,glob S, glob O}. -proof. -proc. -inline {2} 4. -wp;ecall{2}(sem_encode12_vec (toipolyvec s0{2})). -wp;ecall{2}(sem_encode12_vec (toipolyvec t{2})). -swap {1} 4 -2. -swap {2} 2 -1. swap {2} 4 -2. swap {2} 10 -7. -seq 2 3 : (#pre /\ nttm _A{1} = a{2} /\ sd{1} = rho0{2}). -+ inline {1} 2; wp; call(_: ={glob O}); 1: by sim. - by auto => />; smt(nttmK). -swap {2} [2..3] 4. -seq 0 5 : #pre; 1: by inline *;auto. -wp; conseq (_: _ ==> ={e} /\ s{1} = s0{2} /\ sd{1} = rho0{2} /\ nttm _A{1} = a{2}); - last first. -+ conseq (_: true ==> ={e} /\ s{1} = s0{2}); 1: smt(). - inline {2} 3. -inline*. -transitivity {1} - { s <@ CBD2rnd.sample_vec_real(); - e <@ CBD2rnd.sample_vec_real(); } - ( true ==> ={e,s} ) - ( true ==> ={e} /\ s{1}=s0{2} ) => //. - transitivity {1} - { s <@ CBD2rnd.sample_vec_ideal(); - e <@ CBD2rnd.sample_vec_ideal(); } - ( true ==> ={e,s} ) - ( true ==> ={e,s} ) => //. - inline*; wp; rnd. - by wp; rnd; auto. - by symmetry; do 2! call CBD2rnd_vec_equiv; auto. - seq 0 7: (_N{2} = 0 /\ forall (x:W8.t), SmtMap.dom IdealPRF1.RF.m{2} x => W8.to_uint x < _N{2}). - by wp; auto => /> *; smt(SmtMap.mem_empty). - seq 1 2: (s{1}=noise1{2} /\ _N{2} = 3 /\ - forall (x:W8.t), SmtMap.dom IdealPRF1.RF.m{2} x => W8.to_uint x < _N{2}). - inline*; wp. - while (={i} /\ 0 <= i{2} <= kvec /\ _N{2}=i{2} /\ - (forall k, 0 <= k < i{2} => (v{1}.[k]=noise1{2}.[k])%PolyVec) /\ - forall (x:W8.t), SmtMap.dom IdealPRF1.RF.m{2} x => W8.to_uint x < _N{2}). - rcondt {2} 6. - move=> &m; wp; skip => &hr /> ??? Hm ?. - rewrite -implybF => H. - by move: (Hm _ H); rewrite implybF of_uintK /#. - wp; while (#[/:5,7:]pre /\ ={i0, bytes} /\ 0 <= i0{2} <= 128 /\ j{2} = i0{2}*2 /\ - (forall (x1 : W8.t), SmtMap.dom IdealPRF1.RF.m{2} x1 => to_uint x1 <= _N{2}) /\ - forall k, 0 <= k < j{2} => p0{1}.[k] = rr{2}.[k]). - wp; skip => /> &1&2 *; split; first smt(). - split; first smt(). - move=> k ?? /=. - case: (k= 2*i0{2}) => E1. - by rewrite set_neqiE 1..2:/# set_eqiE 1..2:/# set_neqiE 1..2:/# set_eqiE /#. - case: (k= 2*i0{2}+1) => E2. - by rewrite set_eqiE 1..2:/# set_eqiE /#. - by rewrite set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE /#. - wp; rnd; wp; skip => /> &1 &2; rewrite !setvE !getvE => ?????????; split. - split. - by rewrite SmtMap.get_set_sameE. - move=> x; case: (x=W8.of_int i{2}) => E. - by move=> _; rewrite E of_uintK modz_small /#. - rewrite SmtMap.domE SmtMap.get_set_neqE 1:// => H. - by apply StdOrder.IntOrder.ltrW; smt(). - move => p1 i0 p2 ?????? H; split; first smt(). - have EE: p1 = p2. - by apply Array256.tP => k kb; apply H; smt(). - split; last smt(). - move=> k HkL HkR; case: (k = i{2}) => E. - by rewrite /set E !offunvE /= /#. - by rewrite /set !offunvE /= 1..2:/# (eq_sym _ k) E /= /#. - auto => /> &1 &2; split; first smt(). - move=> v1 m i v2 => ??????; split; last smt(). - apply eq_vectorP => k kb;smt(setvE getvE). - wp; seq 2 2: (e{1}=noise2{2} /\ s{1}=noise1{2} /\ _N{2} = 6 /\ - forall (x:W8.t), SmtMap.dom IdealPRF1.RF.m{2} x => W8.to_uint x < _N{2}). - inline*; wp. - while (={i} /\ 0 <= i{2} <= kvec /\ _N{2}=3+i{2} /\ s{1}=noise1{2} /\ - (forall k, 0 <= k < i{2} => (v{1}.[k]=noise2{2}.[k])%PolyVec) /\ - forall (x:W8.t), SmtMap.dom IdealPRF1.RF.m{2} x => W8.to_uint x < _N{2}). - rcondt {2} 6. - move=> &m; wp; skip => &hr /> ??? Hm ?. - rewrite -implybF => H. - by move: (Hm _ H); rewrite implybF of_uintK /#. - wp; while (#[/:6,8:]pre /\ bytes{1}=bytes0{2} /\ i0{1}=i1{2} /\ 0 <= i1{2} <= 128 /\ j0{2} = i1{2}*2 /\ - (forall (x1 : W8.t), SmtMap.dom IdealPRF1.RF.m{2} x1 => to_uint x1 <= _N{2}) /\ - forall k, 0 <= k < j0{2} => p0{1}.[k] = rr0{2}.[k]). - wp; skip => /> &1&2 *; split; first smt(). - split; first smt(). - move=> k ?? /=. - case: (k= 2*i1{2}) => E1. - by rewrite set_neqiE 1..2:/# set_eqiE 1..2:/# set_neqiE 1..2:/# set_eqiE /#. - case: (k= 2*i1{2}+1) => E2. - by rewrite set_eqiE 1..2:/# set_eqiE /#. - by rewrite set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE /#. - wp; rnd; wp; skip => /> &1 &2; rewrite !getvE !setvE => ?????????; split. - split. - by rewrite SmtMap.get_set_sameE. - move=> x; case: (x=W8.of_int (3+i{2})) => E. - by move=> _; rewrite E of_uintK modz_small /#. - rewrite SmtMap.domE SmtMap.get_set_neqE 1:// => H. - by apply StdOrder.IntOrder.ltrW; smt(). - move => p1 i1 p2 ?????? H; split; first smt(). - have EE: p1 = p2. - by apply Array256.tP => k kb; apply H; smt(). - split; first smt(). - split; last smt(). - move=> k HkL HkR; case: (k = i{2}) => E. - by rewrite /set E !offunvE /= /#. - by rewrite /set !offunvE /= 1..2:/# (eq_sym _ k) E /= /#. - auto => /> &1 &2; split; first smt(). - move=> v1 m i v2 ??????; split; last smt(). - by apply eq_vectorP => k kb;smt(setvE getvE). - by auto. -auto => /> &1 &2 e s; rewrite /pk_encode /sk_encode /= polyvecD. -by rewrite -!polyvecD comm_nttv_add comm_nttv_mmul. -qed. - - -equiv enc_eq : - MLWE_PKE(NTTSampler(S,O), O).enc ~ MLKEMSI(S,O).enc : - ={arg,glob S, glob O} ==> ={res,glob S, glob O}. -proof. -proc. -inline {2} 2. -wp;ecall{2}(sem_encode4 (compress_poly 4 v{2})). -wp;ecall{2}(sem_encode10_vec (compress_polyvec 10 u{2})). -wp;ecall{2}(sem_decode1 (m0{2})). -swap {1} [2..4] -1. -swap {2} [7..8] -6. -swap {2} 14 -7. -swap {2} [6..7] -2. -seq 3 5 : (#pre /\ ={e1,e2} /\ r{1} = rv{2}). - inline*; simplify. - transitivity {1} - { r <@ CBD2rnd.sample_vec_real(); - e1 <@ CBD2rnd.sample_vec_real(); - e2 <@ CBD2rnd.sample_real(); } - ( ={pk,m,glob S,glob O} ==> ={pk,m,glob S,glob O,r,e1,e2} ) - ( ={pk,m,glob S,glob O} ==> ={pk,m,glob S,glob O,e1,e2} /\ r{1}=rv{2} ) => //; first smt(). - transitivity {1} - { r <@ CBD2rnd.sample_vec_ideal(); - e1 <@ CBD2rnd.sample_vec_ideal(); - e2 <@ CBD2rnd.sample_ideal(); } - ( ={pk,m,glob S,glob O} ==> ={pk,m,glob S,glob O,r,e1,e2} ) - ( ={pk,m,glob S,glob O} ==> ={pk,m,glob S,glob O,r,e1,e2} ) => //; first smt(). - inline*; wp; rnd. - wp; rnd. - by wp; rnd; auto. - symmetry; call CBD2rnd_equiv. - by do 2! call CBD2rnd_vec_equiv; auto. - seq 0 9: (={pk, m, glob S, glob O} /\ _N{2} = 0 /\ forall (x:W8.t), SmtMap.dom RF.m{2} x => W8.to_uint x < _N{2}). - by wp; auto => /> *; smt(SmtMap.mem_empty). - seq 1 2: (r{1}=noise1{2} /\ ={pk, m, glob S, glob O} /\ _N{2} = 3 /\ forall (x:W8.t), SmtMap.dom RF.m{2} x => W8.to_uint x < _N{2}). - inline*; wp. - while (={i, pk, m, glob S, glob O} /\ 0 <= i{2} <= kvec /\ _N{2}=i{2} /\ - (forall k, 0 <= k < i{2} => (v0{1}.[k]=noise1{2}.[k])%PolyVec) /\ - forall (x:W8.t), SmtMap.dom RF.m{2} x => W8.to_uint x < _N{2}). - rcondt {2} 6. - move=> &m; wp; skip => &hr /> ??? Hm ?. - rewrite -implybF => H. - by move: (Hm _ H); rewrite implybF of_uintK /#. - wp; while (#[/:9,11:]pre /\ ={i0, bytes} /\ 0 <= i0{2} <= 128 /\ j{2} = i0{2}*2 /\ - (forall (x1 : W8.t), SmtMap.dom RF.m{2} x1 => to_uint x1 <= _N{2}) /\ - forall k, 0 <= k < j{2} => p0{1}.[k] = rr{2}.[k]). - wp; skip => /> &1&2 *; split; first smt(). - split; first smt(). - move=> k ?? /=. - case: (k= 2*i0{2}) => E1. - by rewrite set_neqiE 1..2:/# set_eqiE 1..2:/# set_neqiE 1..2:/# set_eqiE /#. - case: (k= 2*i0{2}+1) => E2. - by rewrite set_eqiE 1..2:/# set_eqiE /#. - by rewrite set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE /#. - wp; rnd; wp; skip => /> &1 &2; rewrite !setvE !getvE => ?????????; split. - split. - by rewrite SmtMap.get_set_sameE. - move=> x; case: (x=W8.of_int i{2}) => E. - by move=> _; rewrite E of_uintK modz_small /#. - rewrite SmtMap.domE SmtMap.get_set_neqE 1:// => H. - by apply StdOrder.IntOrder.ltrW; smt(). - move => p1 i0 p2 ?????? H; split; first smt(). - have EE: p1 = p2. - by apply Array256.tP => k kb; apply H; smt(). - split; last smt(). - move=> k HkL HkR; case: (k = i{2}) => E. - by rewrite /set E !offunvE /= /#. - by rewrite /set !offunvE /= 1..2:/# (eq_sym _ k) E /= /#. - auto => /> &1 &2; split; first smt(). - move=> v1 m i v2 ??????; split; last smt(). - by apply eq_vectorP => k kb;smt(setvE getvE). - seq 1 2: (e1{1}=noise2{2} /\ r{1}=noise1{2} /\ ={pk, m, glob S, glob O} /\ _N{2} = 6 /\ forall (x:W8.t), SmtMap.dom RF.m{2} x => W8.to_uint x < _N{2}). - inline*; wp. - while (={i, pk, m, glob S, glob O} /\ 0 <= i{2} <= kvec /\ _N{2}=3+i{2} /\ r{1}=noise1{2} /\ - (forall k, 0 <= k < i{2} => (v0{1}.[k]=noise2{2}.[k])%PolyVec) /\ - forall (x:W8.t), SmtMap.dom RF.m{2} x => W8.to_uint x < _N{2}). - rcondt {2} 6. - move=> &m; wp; skip => &hr /> ??? Hm ?. - rewrite -implybF => H. - by move: (Hm _ H); rewrite implybF of_uintK /#. - wp; while (#[/:10,12:]pre /\ bytes{1}=bytes0{2} /\ i0{1}=i1{2} /\ 0 <= i1{2} <= 128 /\ j0{2} = i1{2}*2 /\ - (forall (x1 : W8.t), SmtMap.dom RF.m{2} x1 => to_uint x1 <= _N{2}) /\ - forall k, 0 <= k < j0{2} => p0{1}.[k] = rr0{2}.[k]). - wp; skip => /> &1&2 *; split; first smt(). - split; first smt(). - move=> k ?? /=. - case: (k= 2*i1{2}) => E1. - by rewrite set_neqiE 1..2:/# set_eqiE 1..2:/# set_neqiE 1..2:/# set_eqiE /#. - case: (k= 2*i1{2}+1) => E2. - by rewrite set_eqiE 1..2:/# set_eqiE /#. - by rewrite set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE /#. - wp; rnd; wp; skip => /> &1 &2; rewrite !setvE !getvE => ?????????; split. - split. - by rewrite SmtMap.get_set_sameE. - move=> x; case: (x=W8.of_int (3+i{2})) => E. - by move=> _; rewrite E of_uintK modz_small /#. - rewrite SmtMap.domE SmtMap.get_set_neqE 1:// => H. - by apply StdOrder.IntOrder.ltrW; smt(). - move => p1 i1 p2 ?????? H; split; first smt(). - have EE: p1 = p2. - by apply Array256.tP => k kb; apply H; smt(). - split; first smt(). - split; last smt(). - move=> k HkL HkR; case: (k = i{2}) => E. - by rewrite /set E !offunvE /= /#. - by rewrite /set !offunvE /= 1..2:/# (eq_sym _ k) E /= /#. - auto => /> &1 &2; split; first smt(). - move=> v1 m i v2 ??????; split; last smt(). - by apply eq_vectorP => k kb;smt(setvE getvE). - seq 1 12: (e2{1}=e20{2} /\ e1{1}=noise2{2} /\ r{1}=noise1{2} /\ ={pk, m, glob S, glob O}). - inline*; wp. - rcondt {2} 6. - move=> &m; wp; skip => &hr /> Hm. - rewrite -implybF => H. - by move: (Hm _ H); rewrite implybF of_uintK /#. - while (#[/:-2]pre /\ bytes{1}=bytes1{2} /\ i{1}=i2{2} /\ 0 <= i2{2} <= 128 /\ j1{2} = i2{2}*2 /\ - forall k, 0 <= k < j1{2} => p{1}.[k] = rr1{2}.[k]). - wp; skip => /> &1&2 *; split; first smt(). - split; first smt(). - move=> k ?? /=. - case: (k= 2*i2{2}) => E1. - by rewrite set_neqiE 1..2:/# set_eqiE 1..2:/# set_neqiE 1..2:/# set_eqiE /#. - case: (k= 2*i2{2}+1) => E2. - by rewrite set_eqiE 1..2:/# set_eqiE /#. - by rewrite set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE 1..2:/# set_neqiE /#. - wp; rnd; wp; skip => /> &1 &2 ????; split. - by rewrite SmtMap.get_set_eqE //=. - move => p1 i1 p2 ????? H. - by apply Array256.tP => k kb; apply H; smt(). - by auto. -conseq />; 1: smt(). -inline {1} 2. -swap {2} 3 -2. swap {2} 6 -3. swap {2} 9 -5. -seq 4 4 : (#pre /\ tv{2} = pk{1}.`1 /\ t{1} = (pk_decode pk{2}).`1 /\ nttm _A{1} = aT{2}). - wp; call(_: ={glob O}); 1: by sim. - by auto => />; smt(nttmK). -wp;ecall{2}(sem_decode12_vec (tv{2})). -auto => /> &1 &2; rewrite /pk_decode /c_encode /m_encode /=. -split; 1: by rewrite -!polyvecD -comm_nttv_mmul invnttvK. -congr. congr. congr. congr. -by rewrite comm_ntt_dotp sem_decode12_vec_corr. -by rewrite sem_decode1_corr. -qed. - -equiv dec_eq : - MLWE_PKE(NTTSampler(S,O), O).dec ~ MLKEMSI(S,O).dec : - ={arg,glob S, glob O} ==> ={res,glob S, glob O}. -proof. -proc. -ecall{2} (sem_encode1 (compress_poly 1 mp{2})). -wp;ecall{2} (sem_decode12_vec (sk{2})). -wp;ecall{2} (sem_decode4 (c2{2})). -wp;ecall{2} (sem_decode10_vec (c1{2})). -auto => /> &2. -rewrite /m_decode /sk_decode /c_decode /=. -congr. congr. congr. -by rewrite sem_decode4_corr. -by rewrite comm_ntt_dotp sem_decode10_vec_corr sem_decode12_vec_corr. -qed. - -end section. - -(***************************************) - -section. - -declare module O <: RO {-IdealHSF.RF,-IdealPRF1.RF,-RF,-KPRF, -B1ROM, -B2ROM}. -declare module S <: Sampler {-IdealHSF.RF,-IdealPRF1.RF, -RF, -KPRF, -O, -B1ROM, -B2ROM}. -declare module As <: MLKEMPKE.Adversary {-IdealPRF1.RF,-IdealHSF.RF,-RF,-O, -S, -KPRF, -B1ROM, -B2ROM}. - -lemma security_any_sampler &m : - islossless O.init => - islossless O.get => - (forall (O0 <: SMP_RO), islossless O0.get => islossless S(O0).sampleA) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless S(O0).sampleAT) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless As(O0).guess) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless As(O0).choose) => - Pr[ MLKEMPKE.CPA(O,MLKEMSI(S),As).main() @ &m : res] - 1%r/2%r = - Pr[MLWE_SMP(B1ROM(As, NTTSampler(S)), NTTSampler(S), O).main(false, false) @ &m : res] - - Pr[MLWE_SMP(B1ROM(As, NTTSampler(S)), NTTSampler(S), O).main(false, true) @ &m : res] + - Pr[MLWE_SMP(B2ROM(As, NTTSampler(S)), NTTSampler(S), O).main(true, false) @ &m : res] - - Pr[MLWE_SMP(B2ROM(As, NTTSampler(S)), NTTSampler(S), O).main(true, true) @ &m : res]. -move => H H0 H1 H2 H3 H4. -have <- : - Pr[ CPA(O,MLWE_PKE(NTTSampler(S,O)),As).main() @ &m : res] = - Pr[ MLKEMPKE.CPA(O,MLKEMSI(S),As).main() @ &m : res]; last first. -+ have <- := (main_theorem_s O (NTTSampler(S)) As &m H H0 _ _ _ _). - + by move => O0 HH; islossless; apply (H1 O0 HH). - + by move => O0 HH; islossless; apply (H2 O0 HH). - + by move => O0 HH; apply (H3 O0 HH). - + by move => O0 HH; apply (H4 O0 HH). - by congr => //=;byequiv => //;sim. -byequiv => //. -proc. -wp;call(_: ={glob O}); 1: by sim. -call (enc_eq O S). -rnd. -call(_: ={glob O}); 1: by sim. -call (keygen_eq O S). -by conseq />; sim. -qed. - -end section. - -section. - -declare module O <: RO {-IdealPRF2.RF,-PRF_.PRF,-HSF.PRF,-IdealHSF.RF,-IdealPRF1.RF,-RF,-KPRF, -B1ROM, -B2ROM}. -declare module XOF <: XOF_RO_t {-IdealPRF2.RF,-PRF_.PRF,-HSF.PRF,-IdealHSF.RF,-IdealPRF1.RF,-RF,-KPRF, -O, -B1ROM, -B2ROM}. -declare module As <: MLKEMPKE.Adversary {-IdealPRF2.RF,-PRF_.PRF,-HSF.PRF,-IdealHSF.RF,-IdealPRF1.RF,-RF,-O, -XOF, -KPRF, -B1ROM, -B2ROM}. - -(* This theorem yields security for any sampler and we can use - it to obtain security for the implementation in the standard - model under the assumption that MLWE is secure when the matrix - is sampled how the implementation does it. - We can't get any formal correctness bounds though, since - the distribution of the matrix is not well defined. *) -lemma security_spec &m : - islossless O.init => - islossless O.get => - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).init) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).next_bytes) => - - (forall (O0 <: SMP_RO) (XOF0 <: XOF_RO_t), - islossless O0.get => - islossless XOF0(O0).next_bytes => - islossless Parse(XOF0(O0)).sample) => - - (forall (O0 <: SMP_RO), islossless O0.get => islossless As(O0).guess) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless As(O0).choose) => - - Pr[ MLKEMPKE.CPA(O,MLKEMS(KHS,KSampler(XOF),KNS,KPRF,KPRF),As).main() @ &m : res] - 1%r/2%r = - (Pr [ HS_DEFS.IND(HSF.PRF,D_ES(KSampler(XOF),O,As)).main() @ &m : res ] - - Pr [ HS_DEFS.IND(IdealHSF.RF,D_ES(KSampler(XOF),O,As)).main() @ &m : res ]) + - (Pr [ PRF_DEFS.IND(PRF_.PRF,D_PRF1(KSampler(XOF),O,As)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF1.RF,D_PRF1(KSampler(XOF),O,As)).main() @ &m : res ]) + - (Pr [ PRF_DEFS.IND(PRF_.PRF,D_PRF2(KSampler(XOF),O,As)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF2.RF,D_PRF2(KSampler(XOF),O,As)).main() @ &m : res ]) + - (Pr[MLWE_SMP(B1ROM(As, NTTSampler(KSampler(XOF))), NTTSampler(KSampler(XOF)), O).main(false, false) @ &m : res] - - Pr[MLWE_SMP(B1ROM(As, NTTSampler(KSampler(XOF))), NTTSampler(KSampler(XOF)), O).main(false, true) @ &m : res] + - Pr[MLWE_SMP(B2ROM(As, NTTSampler(KSampler(XOF))), NTTSampler(KSampler(XOF)), O).main(true, false) @ &m : res] - - Pr[MLWE_SMP(B2ROM(As, NTTSampler(KSampler(XOF))), NTTSampler(KSampler(XOF)), O).main(true, true) @ &m : res]). -move => Oill Ooll XOF_init_ll XOF_next_ll Parse_ll Asgll Ascll. - -(* - WE LOST CONNECTION TO THE SPEC IN THE ROM BECAUSE THE TYPE OF THE XOF DOES NOT TAKE AN RO. - have <- : Pr[ MLKEMPKE.CPA(O,MLKEMS(KHS,KSampler(XOF),KNS,KPRF,KPRF),As).main() @ &m : res]= - Pr[ MLKEMPKE.CPA(O,MLKEM(KHS,XOF,KPRF),As).main() @ &m : res]. -+ byequiv => //. - proc. - wp; call(_: ={glob O, glob XOF}); 1: by sim. - call (enc_sampler_enc O XOF). - rnd. - wp; call(_: ={glob O, glob XOF}); 1: by sim. - call (kg_sampler_kg O XOF). - by call(_: true); auto => />. *) - -have <- := (security_any_sampler O (KSampler(XOF)) As &m Oill Ooll _ _ Asgll Ascll). - + by move => O O_ll; apply (KSamplerA_ll XOF O O_ll XOF_init_ll XOF_next_ll Parse_ll). - by move => O O_ll; apply (KSamplerAT_ll XOF O O_ll XOF_init_ll XOF_next_ll Parse_ll). - -have <- := (ESHop O (KSampler(XOF)) As). -have <- := (PRFHop1 O (KSampler(XOF)) As). -have <- := (PRFHop2 O (KSampler(XOF)) As). -have <- := (MLKEMS_MLKEMIdeal O (KSampler(XOF)) As). -have <-: Pr[CPA(O, MLKEMSI(KSampler(XOF)), As).main() @ &m : res] = - Pr[CPA(O, MLKEMSIdeal(DummyHS(IdealHSF.RF), KSampler(XOF), KNS, DummyPRF1(IdealPRF1.RF), DummyPRF2(RF)), As).main - () @ &m : res] by byequiv => //=; proc; sim. -by ring. -qed. - -end section. - -import MLWE_.SMP_vs_ROM_IND. - -import MLWE_.MLWE_ROM.MLWE_vs_MLWE_ROM. -import MLWE_.MLWE_ROM. -import MLWE_. - -(* This theorem yields security for any sampler that can be - proved indifferentiable from the RO that samples uniform - matrices down to standard MLWE. *) - -section. - -declare module As <: MLKEMPKE.Adversary {-IdealPRF1.RF,-IdealHSF.RF,-RF,-LRO,-RO_SMP.LRO, -RO_H.RO, -RO_H.FRO, -RO_H.LRO,-KPRF, -B1ROM, -B2ROM, -B,-Bt, -BS, -D}. -declare module S <: Sampler {-IdealPRF1.RF,-IdealHSF.RF,-RF,-As, -B1ROM, -B2ROM, -LRO, -RO_SMP.LRO, -RO_H.RO, -RO_H.FRO, -RO_H.LRO, -B,-Bt, -BS, -D}. -declare module Sim <: Simulator_t {-IdealPRF1.RF,-IdealHSF.RF,-S,-As,-B1ROM, -B2ROM, -LRO, -RO_SMP.LRO, -RO_H.RO, -RO_H.FRO, -RO_H.LRO, -B,-Bt, -BS, -D}. - - -lemma security_any_sampler_indiff &m epsilon : - 0%r <= epsilon => - (forall (x : in_t), is_lossless (dout x)) => - (forall (O <: SMP_RO), islossless O.get => islossless S(O).sampleA) => - (forall (O <: SMP_RO), islossless O.get => islossless S(O).sampleAT) => - (forall (O <: SMP_RO), islossless O.get => islossless As(O).guess) => - (forall (O <: SMP_RO), islossless O.get => islossless As(O).choose) => - - (forall tr b (D0 <: Distinguisher_t {-S,-Sim,-RO_H.LRO, -RO_SMP.RO, -RO_SMP.LRO, -NTTSampler(S)}), - `| Pr[ WIndfReal(D0,NTTSampler(S),RO_SMP.LRO).main(tr,b) @ &m : res] - - Pr[ WIndfIdeal(D0,Sim,RO_H.LRO).main(tr,b) @ &m : res] | <= epsilon) => - - `| Pr[ MLKEMPKE.CPA(LRO,MLKEMSI(S),As).main() @ &m : res] - 1%r/2%r | <= - `| Pr[MLWE(B(BS(B1ROM(As,NTTSampler(S)),Sim),RO_H.LRO)).main(false) @ &m : res] - - Pr[MLWE(B(BS(B1ROM(As,NTTSampler(S)),Sim),RO_H.LRO)).main(true) @ &m : res] | + - `| Pr[MLWE(Bt(BS(B2ROM(As,NTTSampler(S)),Sim),RO_H.LRO)).main(false) @ &m : res] - - Pr[MLWE(Bt(BS(B2ROM(As,NTTSampler(S)),Sim),RO_H.LRO)).main(true) @ &m : res] | + 4%r * epsilon. -move => eps_ge0 H H0 H1 H2 H3 H4. -have <- : - Pr[ CPA(LRO,MLWE_PKE(NTTSampler(S,LRO)),As).main() @ &m : res] = - Pr[ MLKEMPKE.CPA(LRO,MLKEMSI(S),As).main() @ &m : res]. -+ byequiv => //. - proc. - wp;call(_: ={glob LRO}); 1: by sim. - call (enc_eq LRO S). - rnd. - call(_: ={glob LRO}); 1: by sim. - call (keygen_eq LRO S). - by conseq />; sim. - -move : (main_theorem_ref As (NTTSampler(S)) Sim &m epsilon eps_ge0 H _ _ H2 H3 H4). -+ by move => O0 HH; islossless; apply (H0 O0 HH). -by move => O0 HH; islossless; apply (H1 O0 HH). -have -> : Pr[PKE_ROM.CPA(RO_SMP.LRO,MLWE_PKE(NTTSampler(S, RO_SMP.LRO)), As).main() @ &m : res] = Pr[CPA(LRO, MLWE_PKE(NTTSampler(S, LRO)), As).main() @ &m : res]; last by auto. - + byequiv => //;proc;inline *. - wp; call(_: RO_SMP.RO.m{1} = RO.m{2}); 1: by proc;inline *; sim;auto. - wp; call(_: RO_SMP.RO.m{1} = RO.m{2}); 1: by proc;inline *; sim;auto. - auto;call(_: RO_SMP.RO.m{1} = RO.m{2}); 1: by proc;inline *; sim;auto. - wp; call(_: RO_SMP.RO.m{1} = RO.m{2}); 1: by proc;inline *; sim;auto. - by auto => />. -qed. - -end section. - -(* The following theorem is the strongest claim we can make - about the MLKEM Spec. It comes with two assumptions on the - matrix sampling procedure: - 1) that it converges (losslessness of Parse) - there is some hope of proving this when XOF is a ROM. - 2) that it is indifferentiable (explainable) wrt to the - random oracle that simply samples uniform matrices. - this basically means that there must exist a simulator - that can produce a plausible XOF output for any - given Matrix. Easy to claim in paper when XOF is a ROM: - just rejection-sample a different matrix and program - the coefficients with the ones you want to explain. - No clue how to prove this in EC though. - But note that, in this case we get security down - to MLWE. *) - -section. - -declare module As <: MLKEMPKE.Adversary {-IdealPRF2.RF,-PRF_.PRF,-HSF.PRF,-IdealHSF.RF,-IdealPRF1.RF,-LRO, -RO_H.RO, -RO_H.FRO, -RO_H.LRO, -RO_SMP.LRO, -KPRF, -B1ROM, -B2ROM, -B,-Bt, -BS, -D}. -declare module XOF <: XOF_RO_t {-IdealPRF2.RF,-PRF_.PRF,-HSF.PRF,-IdealHSF.RF,-IdealPRF1.RF,-As, -B1ROM, -B2ROM, -LRO, -RO_H.RO, -RO_H.FRO, -RO_H.LRO, -RO_SMP.LRO, -B,-Bt, -BS, -D}. -declare module Sim <: Simulator_t {-IdealPRF2.RF,-PRF_.PRF,-HSF.PRF,-IdealHSF.RF,-IdealPRF1.RF,-XOF,-As,-B1ROM, -B2ROM, -LRO, -RO_H.RO, -RO_H.FRO, -RO_H.LRO, -RO_SMP.LRO, -B,-Bt, -BS, -D}. - -lemma security_spec_indiff &m epsilon: - 0%r <= epsilon => - - (forall (x : in_t), is_lossless (dout x)) => - - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).init) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless XOF(O0).next_bytes) => - - (forall (O0 <: SMP_RO) (XOF0 <: XOF_RO_t), - islossless O0.get => - islossless XOF0(O0).next_bytes => - islossless Parse(XOF0(O0)).sample) => - - (forall (O0 <: SMP_RO), islossless O0.get => islossless As(O0).guess) => - (forall (O0 <: SMP_RO), islossless O0.get => islossless As(O0).choose) => - - (forall tr b (D0 <: Distinguisher_t {-KSampler(XOF),-Sim, -RO_H.LRO, -RO_SMP.RO, -RO_SMP.LRO, -KSampler(XOF)}), - `| Pr[ WIndfReal(D0,NTTSampler(KSampler(XOF)),RO_SMP.LRO).main(tr,b) @ &m : res] - - Pr[ WIndfIdeal(D0,Sim,RO_H.LRO).main(tr,b) @ &m : res] | <= epsilon) => - - - `| Pr[ MLKEMPKE.CPA(LRO,MLKEMS(KHS,KSampler(XOF),KNS,KPRF,KPRF),As).main() @ &m : res] - 1%r/2%r | <= - `| Pr [ HS_DEFS.IND(HSF.PRF,D_ES(KSampler(XOF),LRO,As)).main() @ &m : res ] - - Pr [ HS_DEFS.IND(IdealHSF.RF,D_ES(KSampler(XOF),LRO,As)).main() @ &m : res ] | + - `| Pr [ PRF_DEFS.IND(PRF_.PRF,D_PRF1(KSampler(XOF),LRO,As)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF1.RF,D_PRF1(KSampler(XOF),LRO,As)).main() @ &m : res ] | + - `| Pr [ PRF_DEFS.IND(PRF_.PRF,D_PRF2(KSampler(XOF),LRO,As)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF2.RF,D_PRF2(KSampler(XOF),LRO,As)).main() @ &m : res ] | + - `| Pr[MLWE(B(BS(B1ROM(As,NTTSampler(KSampler(XOF))),Sim),RO_H.LRO)).main(false) @ &m : res] - - Pr[MLWE(B(BS(B1ROM(As,NTTSampler(KSampler(XOF))),Sim),RO_H.LRO)).main(true) @ &m : res] | + - `| Pr[MLWE(Bt(BS(B2ROM(As,NTTSampler(KSampler(XOF))),Sim),RO_H.LRO)).main(false) @ &m : res]- - Pr[MLWE(Bt(BS(B2ROM(As,NTTSampler(KSampler(XOF))),Sim),RO_H.LRO)).main(true) @ &m : res] | + 4%r * epsilon. - -move => eps_ge0 Oill XOF_init_ll XOF_next_ll Parse_ll Asgll Ascll ind. -(* - WE LOST CONNECTION TO THE SPEC IN THE ROM BECAUSE THE TYPE OF THE XOF DOES NOT TAKE AN RO. - -have <- : Pr[ MLKEMPKE.CPA(LRO,MLKEMS(KHS,KSampler(XOF),KNS,KPRF,KPRF),As).main() @ &m : res]= - Pr[ MLKEMPKE.CPA(LRO,MLKEM(KHS,XOF,KPRF),As).main() @ &m : res]. -+ byequiv => //. - proc. - wp; call(_: ={glob LRO, glob XOF}); 1: by sim. - call (enc_sampler_enc LRO XOF). - rnd. - wp; call(_: ={glob LRO, glob XOF}); 1: by sim. - call (kg_sampler_kg LRO XOF). - by inline *; auto. -*) -+ have := (security_any_sampler_indiff As (KSampler(XOF)) Sim &m epsilon eps_ge0 Oill _ _ Asgll Ascll ind). - + by move => O O_ll; apply (KSamplerA_ll XOF O O_ll XOF_init_ll XOF_next_ll Parse_ll). - by move => O O_ll; apply (KSamplerAT_ll XOF O O_ll XOF_init_ll XOF_next_ll Parse_ll). - -have <- := (ESHop LRO (KSampler(XOF)) As). -have <- := (PRFHop1 LRO (KSampler(XOF)) As). -have <- := (PRFHop2 LRO (KSampler(XOF)) As). -have <- := (MLKEMS_MLKEMIdeal LRO (KSampler(XOF)) As). -have <-: Pr[CPA(LRO, MLKEMSI(KSampler(XOF)), As).main() @ &m : res] = - Pr[CPA(LRO, MLKEMSIdeal(DummyHS(IdealHSF.RF), KSampler(XOF), KNS, DummyPRF1(IdealPRF1.RF), DummyPRF2(RF)), As).main - () @ &m : res] by byequiv => //=; proc; sim. -by smt(). -qed. - - -end section. - -(* The correctness bounds can only be computed when matrices are - uniform, so we jump directly to results where one can assume - that the sampler is indifferentiable from the nice RO. - Note here we are working with the scheme with ideal HS and - PRFs. *) - -section. - -declare module A <: CORR_ADV {-IdealPRF1.RF,-IdealHSF.RF,-LRO,-RO_H.LRO, -RO_SMP.RO, -RO_SMP.LRO, -CB}. -declare module S <: Sampler {-IdealPRF1.RF,-IdealHSF.RF,-A,-LRO,-RO_H.LRO, -RO_SMP.RO, -RO_SMP.LRO, -CB}. -declare module Sim <: Simulator_t {-IdealPRF1.RF,-IdealHSF.RF,-S, -A,-LRO,-RO_H.LRO, -RO_SMP.RO, -RO_SMP.LRO, -CB}. - - -lemma correctness_any_sampler &m cu_bound failprob1 failprob2 epsilon : - (glob Bcb2){m} = cu_bound => - - (forall (O <: Ideal_RO), islossless O.get => islossless Sim(O).init) => - (forall (O <: Ideal_RO), islossless O.get => islossless Sim(O).get) => - (forall (O <: SMP_RO), islossless O.get => islossless A(O).find) => - - (forall trb (D0 <: Distinguisher_t {-S,-RO_H.RO, -RO_H.LRO, -RO_SMP.RO, -RO_SMP.LRO, -NTTSampler(S),-Sim}), - `| Pr[ WIndfReal(D0,NTTSampler(S),RO_SMP.LRO).main(trb) @ &m : res] - - Pr[ WIndfIdeal(D0,Sim,RO_H.LRO).main(trb) @ &m : res] | <= epsilon) => - - Pr[ CB1.main(cu_bound, cv_bound_max) @ &m : res] <= failprob1 => - Pr[ CB2.main(cu_bound) @ &m : res] <= failprob2 => - - Pr[ MLKEMPKE.Correctness_Adv(LRO,MLKEMSI(S),A).main() @ &m : res] <= - `|Pr[MLWE(Bcb2).main(false) @ &m : res] - - Pr[MLWE(Bcb2).main(true) @ &m : res]| + failprob1 + failprob2 + epsilon. -move => initmem Sim_i_ll Sim_h_ll A_ll ind fp1 fp2. - -have <- : -Pr[PKE_ROM.Correctness_Adv(RO_SMP.LRO,MLWE_PKE(NTTSampler(S, RO_SMP.LRO)), A).main() @ &m : res] = -Pr[Correctness_Adv(LRO,MLKEMSI(S), A).main() @ &m : res]; last by apply (correctness_max A (NTTSampler(S)) Sim &m cu_bound epsilon failprob1 failprob2 initmem Sim_i_ll Sim_h_ll A_ll ind fp1 fp2). -have <-: Pr[PKE_ROM.Correctness_Adv(LRO, MLWE_PKE(NTTSampler(S,LRO)), A).main() @ &m : res] = Pr[PKE_ROM.Correctness_Adv(RO_SMP.LRO,MLWE_PKE(NTTSampler(S, RO_SMP.LRO)), A).main() @ &m : res]. -+ byequiv => //; proc;inline *. - wp; call(_: RO_SMP.RO.m{2} = RO.m{1}); 1: by proc;inline *; sim;auto. - auto => />; 1: by smt(). - wp; call(_: RO_SMP.RO.m{2} = RO.m{1}); 1: by proc;inline *; sim;auto. - wp; call(_: RO_SMP.RO.m{2} = RO.m{1}); 1: by proc;inline *; sim;auto. - by auto => />. -byequiv => //. -proc. -wp;call (dec_eq LRO S). -wp;call (enc_eq LRO S). -call(_: ={glob S, glob LRO}); 1: by sim. -wp;call (keygen_eq LRO S). -by conseq => />; sim. -qed. - -end section. - -(* For the full spec we get the following result, which does - not require losslessness, but does require indifferentiability. - However we must first game hop to the ideal scheme. *) - -module (DC_ES(S : Sampler, O : RO, Ac : MLKEMPKE.CORR_ADV) : HS_DEFS.Distinguisher) (F : HS_DEFS.PRF_Oracles) = { - - proc distinguish() : bool = { - var b; - b <@ MLKEMPKE.Correctness_Adv(O,MLKEMS(DummyHS_D(F),S,KNS,KPRF,KPRF),Ac).main(); - return b; - } -}. - -module (DC_PRF1(S : Sampler, O : RO, Ac : MLKEMPKE.CORR_ADV) : PRF_DEFS.Distinguisher) (F : PRF_DEFS.PRF_Oracles) = { - - proc distinguish() : bool = { - var b; - b <@ MLKEMPKE.Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(F),KPRF),Ac).main(); - return b; - } -}. - -module (DC_PRF2(S : Sampler, O : RO, Ac : MLKEMPKE.CORR_ADV) : PRF_DEFS.Distinguisher) (F : PRF_DEFS.PRF_Oracles) = { - - proc distinguish() : bool = { - var b; - b <@ MLKEMPKE.Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),DummyPRF2D(F)),Ac).main(); - return b; - } -}. - -section. - -declare module O <: RO {-IdealPRF2.RF, -IdealPRF1.RF, -PRF_.PRF, -IdealHSF.RF, -KPRF, -B1ROM, -B2ROM, -HSF.PRF}. -declare module S <: Sampler {-IdealPRF2.RF, -IdealPRF1.RF, -PRF_.PRF, -IdealHSF.RF, -KPRF, -O, -B1ROM, -B2ROM}. -declare module Ac <: MLKEMPKE.CORR_ADV {-IdealPRF2.RF, -IdealPRF1.RF, -PRF_.PRF, -IdealHSF.RF, -HSF.PRF, -O, -S, -KPRF, -B1ROM, -B2ROM}. - -lemma ESHopC &m : - Pr [ MLKEMPKE.Correctness_Adv(O,MLKEMS(KHS,S,KNS,KPRF,KPRF),Ac).main() @ &m : res] - - Pr [ MLKEMPKE.Correctness_Adv(O,MLKEMS(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),Ac).main() @ &m : res] = - Pr [ HS_DEFS.IND(HSF.PRF,DC_ES(S,O,Ac)).main() @ &m : res ] - - Pr [ HS_DEFS.IND(IdealHSF.RF,DC_ES(S,O,Ac)).main() @ &m : res ]. -proof. -have -> : Pr[Correctness_Adv(O,MLKEMS(KHS, S, KNS, KPRF,KPRF), Ac).main() @ &m : res] = - Pr[HS_DEFS.IND(HSF.PRF, DC_ES(S, O, Ac)).main() @ &m : res] . -+ byequiv => //. - proc. - inline {2} 2. inline {2} 2. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk,sk, glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob Ac, glob S, glob O}); 1: by sim. - inline {1} 1. inline {1} 1. inline {2} 2. inline {2} 1. - seq 3 3 : (#pre /\ ={rho,sig}); 1: by inline *; sim. - by sim. - -+ have -> : - Pr[Correctness_Adv(O,MLKEMS(DummyHS(IdealHSF.RF), S, KNS, KPRF, KPRF), Ac).main() @ &m : res] = - Pr[HS_DEFS.IND(IdealHSF.RF, DC_ES(S, O, Ac)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {2} 2. inline {2} 2. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk,sk, glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob Ac, glob S, glob O}); 1: by sim. - inline {1} 1. inline {1} 1. inline {2} 2. inline {2} 1. - seq 3 3 : (#pre /\ ={rho,sig}); 1: by inline *; sim. - by sim. -done. -qed. - -lemma MLKEMS_MLKEMIdeal_C &m : - Pr [ Correctness_Adv(O,MLKEMS(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),Ac).main() @ &m : res] = - Pr [ Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),Ac).main() @ &m : res]. -proof. -byequiv => //. -proc. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk,sk, glob O,glob S}); 1: smt(). - seq 1 1 : (={glob Ac, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 1. - wp; call(_: ={glob O, glob S}); 1: by sim. - conseq => />. - inline *. - rcondt{1} 6; 1: by move => *; auto => />;smt(@SmtMap). - seq {1} 5 2 : (true); 1: by auto. - wp. - conseq (_: _ ==> rho{2} = Array32.init (fun i => (r{1}.`1.[i])%PolyVec) /\ - k{2} = Array32.init (fun i => (r{1}.`2.[i])%PolyVec)). - move => /> [#] &1 r; - rewrite !SmtMap.get_set_sameE !oget_some;smt(Array32.tP setvE getvE Array32.initiE). - transitivity {1} { r <@ ArraySample.sL(); } - (true ==> ={r}) (true ==> rho{2} = Array32.init (fun i => r{1}.`1.[i]) /\ k{2} = Array32.init (fun i => r{1}.`2.[i])) => //; first - by inline*; auto => /> &1 ? [rl1 rl2] /= => *;split;rewrite tP => *;rewrite initiE /#. - - transitivity {2} { (rho, k) <@ ArraySample.sR(); } - (true ==> rho{2} = Array32.init (fun i => r{1}.`1.[i]) /\ k{2} = Array32.init (fun i => r{1}.`2.[i])) (true ==> ={rho,k})=> //. - call arrsample; 1: by auto => /> *;split;rewrite tP => *;rewrite initiE /#. - by inline*; auto => /> &1 ? [rl1 rl2] /= => *;split;rewrite tP => *;rewrite initiE /#. -qed. - -lemma PRFHop1C &m : - Pr [ Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,KPRF,KPRF),Ac).main() @ &m : res] - - Pr [ Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),KPRF),Ac).main() @ &m : res] = - Pr [ PRF_DEFS.IND(PRF_.PRF,DC_PRF1(S,O,Ac)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF1.RF,DC_PRF1(S,O,Ac)).main() @ &m : res ]. -proof. -have -> : Pr[Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, KPRF, KPRF), Ac).main() @ &m : res] = - Pr[PRF_DEFS.IND(PRF_.PRF, DC_PRF1(S, O, Ac)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {2} 2. inline {2} 2. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk, sk,glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob Ac, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 2. - swap {2} [2..3] -1. - seq 2 2 : (#pre /\ ={glob IdealHSF.RF, rho}). - + by inline *; sim. - seq 1 2 : (#pre /\ sig{1} = PRF_.PRF.k{2});1: by inline *; auto => />. - inline {1} 1; inline {2} 1. - sim 8 8. - seq 7 7 : (#pre /\ ={a,rho0} /\ sig0{1} = PRF_.PRF.k{2}); 1: by sim. - conseq => />. - inline {1} 1; inline {2} 1. - inline *. - unroll for {1} 8. - unroll for {2} 8. - do 3!( wp; conseq => />; sim). - wp;auto => />;sim. - unroll for {1} 6. - unroll for {2} 6. - do 3!( wp; conseq => />; sim). - by wp;auto => />;sim. - -have -> : Pr[Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, DummyPRF1(IdealPRF1.RF), KPRF), Ac).main() @ &m : res] = - Pr[PRF_DEFS.IND(IdealPRF1.RF, DC_PRF1(S, O, Ac)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {2} 2. inline {2} 2. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - wp; call(_: ={glob O}); 1: by sim. - conseq (_: _ ==> ={pk, sk, glob O,glob S}); 1: smt(). - swap {2} 2 -1. - seq 1 1 : (={glob Ac, glob S, glob O}); 1: by sim. - inline {1} 1. inline {2} 2. - swap {2} [2..3] -1. - seq 2 2 : (#pre /\ ={glob IdealHSF.RF, rho}). - + by inline *; sim. - seq 1 2 : (#pre /\ ={glob IdealPRF1.RF,sig});1: by inline *; auto => />. - by inline *;sim. -done. -qed. - -lemma PRFHop2C &m : - Pr [ Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),KPRF),Ac).main() @ &m : res] - - Pr [ Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF),S,KNS,DummyPRF1(IdealPRF1.RF),DummyPRF2(IdealPRF2.RF)),Ac).main() @ &m : res] = - Pr [ PRF_DEFS.IND(PRF_.PRF,DC_PRF2(S,O,Ac)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF2.RF,DC_PRF2(S,O,Ac)).main() @ &m : res ]. -proof. -have -> : Pr[Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, DummyPRF1(IdealPRF1.RF), KPRF), Ac).main() @ &m : res] = - Pr[PRF_DEFS.IND(PRF_.PRF, DC_PRF2(S, O, Ac)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {2} 2. inline {2} 2. - wp; call(_: ={glob O, glob S}); 1: by sim. - swap {2} 1 3. - seq 3 3 : (={glob S, glob Ac, glob O,pk,sk,m}). - + wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - by inline *; auto => />; sim. - conseq (_: _ ==> ={c, glob O,glob S}); 1: smt(). - inline {1} 1. inline {2} 2. - swap {2} [2..3] -1; sp 2 2. - seq 1 2 : (#pre /\ r{1} = PRF_.PRF.k{2});1: by inline *; auto => />. - inline {1} 1; inline {2} 1. - sim 13 13. - seq 12 12 : (={that, m1, aT, glob S, glob O} /\ r0{1} = PRF_.PRF.k{2}); 1: by sim. - conseq => />. - inline {1} 1; inline {2} 1. - inline *. - wp;auto => />;sim. - unroll for {1} 8. - unroll for {2} 8. - do 3!( wp; conseq => />; sim). - wp;auto => />;sim. - unroll for {1} 6. - unroll for {2} 6. - do 3!( wp; conseq => />; sim). - by wp;auto => />;sim. - -have -> : Pr[Correctness_Adv(O,MLKEMSIdeal(DummyHS(IdealHSF.RF), S, KNS, DummyPRF1(IdealPRF1.RF), DummyPRF2(RF)), Ac).main - () @ &m : res] = - Pr[PRF_DEFS.IND(RF, DC_PRF2(S, O, Ac)).main() @ &m : res]. -+ byequiv => //. - proc. - inline {2} 2. inline {2} 2. - wp; call(_: ={glob O, glob S}); 1: by sim. - swap {2} 1 3. - seq 3 3 : (={glob S, glob Ac, glob O,pk,sk,m}). - + wp; call(_: ={glob O}); 1: by sim. - wp; call(_: ={glob O, glob S}); 1: by sim. - by inline *; auto => />; sim. - conseq (_: _ ==> ={c, glob O,glob S}); 1: smt(). - inline {1} 1. inline {2} 2. - swap {2} [2..3] -1; sp 2 2. - seq 1 2 : (#pre /\ ={glob IdealPRF2.RF,r});1: by inline *; auto => />. - by inline *;sim => /> /#. -done. -qed. - - -end section. - - -section. - - - -declare module A <: CORR_ADV {-HSF.PRF, -PRF_.PRF, -B1ROM, -B2ROM, -KPRF, -RF,-IdealHSF.RF, -IdealPRF1.RF, -LRO,-RO_H.LRO,-RO_SMP.RO, -RO_SMP.LRO, -CB}. -declare module XOF <: XOF_RO_t {-HSF.PRF, -PRF_.PRF, -B1ROM, -B2ROM, -KPRF, -RF,-IdealHSF.RF, -IdealPRF1.RF, -A,-LRO,-RO_H.LRO,-RO_SMP.RO, -RO_SMP.LRO, -CB}. -declare module Sim <: Simulator_t {-HSF.PRF, -PRF_.PRF, -B1ROM, -B2ROM, -KPRF, -RF,-IdealHSF.RF, -IdealPRF1.RF, -XOF, -A,-LRO,-RO_H.LRO,-RO_SMP.RO, -RO_SMP.LRO, -CB}. - - -lemma correctness_spec &m cu_bound failprob1 failprob2 epsilon : - (glob Bcb2){m} = cu_bound => - - (forall (O <: Ideal_RO), islossless O.get => islossless Sim(O).init) => - (forall (O <: Ideal_RO), islossless O.get => islossless Sim(O).get) => - (forall (O <: SMP_RO), islossless O.get => islossless A(O).find) => - - (forall (trb : bool * bool) - (D0 <: Distinguisher_t{ -RO_H.RO, -RO_H.LRO, -RO_SMP.RO, -RO_SMP.LRO, -Sim, -KSampler(XOF), -NTTSampler(KSampler(XOF))}), - `|Pr[WIndfReal(D0, NTTSampler(KSampler(XOF)), RO_SMP.LRO).main(trb) @ &m : - res] - - Pr[WIndfIdeal(D0, Sim, RO_H.LRO).main(trb) @ &m : res]| <= - epsilon) => - - Pr[ CB1.main(cu_bound, cv_bound_max) @ &m : res] <= failprob1 => - Pr[ CB2.main(cu_bound) @ &m : res] <= failprob2 => - - Pr[ MLKEMPKE.Correctness_Adv(LRO,MLKEMS(KHS, KSampler(XOF), KNS,KPRF,KPRF),A).main() @ &m : res] <= - `|Pr[MLWE(Bcb2).main(false) @ &m : res] - - Pr[MLWE(Bcb2).main(true) @ &m : res]| + failprob1 + failprob2 + epsilon + - (`|Pr [ HS_DEFS.IND(HSF.PRF,DC_ES(KSampler(XOF),LRO,A)).main() @ &m : res ] - - Pr [ HS_DEFS.IND(IdealHSF.RF,DC_ES(KSampler(XOF),LRO,A)).main() @ &m : res ]|) + - (`|Pr [ PRF_DEFS.IND(PRF_.PRF,DC_PRF1(KSampler(XOF),LRO,A)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF1.RF,DC_PRF1(KSampler(XOF),LRO,A)).main() @ &m : res ]|) + - (`|Pr [ PRF_DEFS.IND(PRF_.PRF,DC_PRF2(KSampler(XOF),LRO,A)).main() @ &m : res ] - - Pr [ PRF_DEFS.IND(IdealPRF2.RF,DC_PRF2(KSampler(XOF),LRO,A)).main() @ &m : res ]|). -move => meminit Sim_i_ll Sim_h_ll A_ll ind fp1 fp2. -(* - We lost connection to the spec in the rom -have <-: Pr[Correctness_Adv(LRO,MLKEMS(KHS, KSampler(XOF), KNS,KPRF,KPRF), A).main() @ &m : res] = - Pr[Correctness_Adv(LRO,MLKEM(KHS, XOF, KPRF), A).main() @ &m : res]. -+ byequiv => //. - proc. - wp; call(_: ={glob LRO, glob XOF}); 1: by sim. - call (enc_sampler_enc LRO XOF). - wp; call(_: ={glob LRO}); 1: by sim. - call (kg_sampler_kg LRO XOF). - by inline *; auto. -*) - -move : (correctness_any_sampler A (KSampler(XOF)) Sim &m cu_bound failprob1 failprob2 epsilon meminit Sim_i_ll Sim_h_ll A_ll ind fp1 fp2). - -have <- := (ESHopC LRO (KSampler(XOF)) A). -have <- := (PRFHop1C LRO (KSampler(XOF)) A). -have <- := (PRFHop2C LRO (KSampler(XOF)) A). -have <- := (MLKEMS_MLKEMIdeal_C LRO (KSampler(XOF)) A). -have <-: Pr[Correctness_Adv(LRO,MLKEMSI(KSampler(XOF)), A).main() @ &m : res] = - Pr[Correctness_Adv(LRO,MLKEMSIdeal(DummyHS(IdealHSF.RF), KSampler(XOF), KNS, DummyPRF1(IdealPRF1.RF), DummyPRF2(RF)), A).main - () @ &m : res] by byequiv => //=; proc; sim. - - -by smt(). -qed. - -end section. - -