From a9a1452b3246a96f7c2ee6da36e5a8a9f6424902 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Fri, 26 Jul 2024 17:36:28 +0100 Subject: [PATCH] test: benchmarking pass --- code/jasmin/mlkem_avx2/Makefile | 11 +- code/jasmin/mlkem_avx2/cycles.jinc | 23 ++ code/jasmin/mlkem_avx2/jspeed.jazz | 315 ++++++++++++++------- code/jasmin/mlkem_avx2/speed.h | 33 ++- code/jasmin/mlkem_avx2/test/speed_indcpa.c | 105 ------- code/jasmin/mlkem_avx2/test/speed_mlkem.c | 135 ++++----- code/jasmin/mlkem_ref/Makefile | 13 +- code/jasmin/mlkem_ref/cycles.jinc | 23 ++ code/jasmin/mlkem_ref/jspeed.jazz | 297 +++++++++++++++++++ code/jasmin/mlkem_ref/speed.h | 61 ++++ code/jasmin/mlkem_ref/test/speed_mlkem.c | 226 +++++++++++++++ 11 files changed, 922 insertions(+), 320 deletions(-) create mode 100644 code/jasmin/mlkem_avx2/cycles.jinc delete mode 100644 code/jasmin/mlkem_avx2/test/speed_indcpa.c create mode 100644 code/jasmin/mlkem_ref/cycles.jinc create mode 100644 code/jasmin/mlkem_ref/jspeed.jazz create mode 100644 code/jasmin/mlkem_ref/speed.h create mode 100644 code/jasmin/mlkem_ref/test/speed_mlkem.c diff --git a/code/jasmin/mlkem_avx2/Makefile b/code/jasmin/mlkem_avx2/Makefile index b1cd5c6c..533bda5f 100644 --- a/code/jasmin/mlkem_avx2/Makefile +++ b/code/jasmin/mlkem_avx2/Makefile @@ -49,7 +49,7 @@ TESTS := \ test: $(TESTS) -speed: test/speed_indcpa test/speed_mlkem +speed: test/speed_mlkem # -- @@ -161,16 +161,12 @@ run-tests: compile-tests # -- -test/speed_indcpa: test/speed_indcpa.c $(HEADERS) $(C_SOURCES) $(S_INC) jspeed.s +test/speed_mlkem: test/speed_mlkem.c $(HEADERS) $(C_SOURCES) $(S_INC) $(RANDOMBYTES) jspeed.s $(CC) $(CFLAGS) -o $@ $(C_SOURCES) $(RANDOMBYTES) jspeed.s $< -test/speed_mlkem: test/speed_mlkem.c $(HEADERS) $(C_SOURCES) $(S_INC) jspeed.s - $(CC) $(CFLAGS) -o $@ $(C_SOURCES) $(RANDOMBYTES) jspeed.s $< - -compile-speed: test/speed_indcpa test/speed_mlkem +compile-speed: test/speed_mlkem run-speed: compile-speed - ./test/speed_indcpa ./test/speed_mlkem # -- @@ -191,7 +187,6 @@ clean: -rm -f test/test_fips202 -rm -f test/test_indcpa -rm -f test/test_kem - -rm -f test/speed_indcpa -rm -f test/speed_mlkem ifeq ($(OS),Darwin) -rm -r -f test/*.dSYM diff --git a/code/jasmin/mlkem_avx2/cycles.jinc b/code/jasmin/mlkem_avx2/cycles.jinc new file mode 100644 index 00000000..bcdf5f10 --- /dev/null +++ b/code/jasmin/mlkem_avx2/cycles.jinc @@ -0,0 +1,23 @@ +inline fn tsc() -> stack u32[2] +{ + reg u64 l h; + stack u32[2] t; + + h, l = #RDTSC(); + + t[0] = (32u) l; + t[1] = (32u) h; + + return t; +} + +inline fn cycles(stack u32[2] start end) -> reg u64 +{ + reg u64 t; + + t = end[u64 0]; + t -= start[u64 0]; + + return t; +} + diff --git a/code/jasmin/mlkem_avx2/jspeed.jazz b/code/jasmin/mlkem_avx2/jspeed.jazz index 62907ed1..8f787640 100644 --- a/code/jasmin/mlkem_avx2/jspeed.jazz +++ b/code/jasmin/mlkem_avx2/jspeed.jazz @@ -5,193 +5,296 @@ require "indcpa.jinc" require "kem.jinc" require "verify.jinc" -/* Exported functions only for benchmarking */ -export fn gen_matrix_jazz(reg u64 ap, reg u64 seedp) +require "cycles.jinc" + +// note: this code needs to be reviewed and properly tested + +// exported functions only for benchmarking + +export fn gen_matrix_jazz(reg u64 ap seedp) -> reg u64 { - stack u16[MLKEM_K*MLKEM_VECN] aa; + stack u32[2] start end; + reg u64 t; + stack u64 aps; + stack u16[MLKEM_K*MLKEM_VECN] a; stack u8[MLKEM_SYMBYTES] seed; + reg u64 i; - aa = __gen_matrix(seed, 1); -} + aps = ap; + i=0; while(i < MLKEM_SYMBYTES){ seed[i] = (u8)[seedp + i]; i += 1; } -export fn poly_compress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[MLKEM_N] a; + start = tsc(); - a = _poly_compress(rp, a); -} + a = __gen_matrix(seed, 1); -export fn poly_decompress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[MLKEM_N] r; + end = tsc(); - r = _poly_decompress(r, ap); + ap = aps; + i=0; while(i < MLKEM_K*MLKEM_VECN){ (u16)[ap + 2*i] = a[i]; i += 1; } + + t = cycles(start, end); + return t; } -export fn poly_tomsg_jazz(reg u64 rp, reg u64 ap) +// ////////////////////////////////////////////////////////////////// + +export fn poly_getnoise_4x_jazz(reg u64 r0p r1p r2p r3p, reg u64 seedp, reg u8 nonce) -> reg u64 { - stack u16[MLKEM_N] a; + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] r0 r1 r2 r3; + stack u8[MLKEM_SYMBYTES] seed; + reg u64 i; - a = _poly_tomsg(rp, a); -} + () = #spill(r0p, r1p, r2p, r3p); + i=0; while(i < MLKEM_SYMBYTES){ seed[i] = (u8)[seedp + i]; i += 1; } -export fn poly_frommsg_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[MLKEM_N] r; + start = tsc(); + + r0, r1, r2, r3 = _poly_getnoise_eta1_4x(r0, r1, r2, r3, seed, nonce); - r = _poly_frommsg(r, ap); + end = tsc(); + + () = #unspill(r0p, r1p, r2p, r3p); + + i=0; while(i < MLKEM_N) + { (u16)[r0p + 2*i] = r0[i]; + (u16)[r1p + 2*i] = r1[i]; + (u16)[r2p + 2*i] = r2[i]; + (u16)[r3p + 2*i] = r3[i]; + i+= 1; + } + + t = cycles(start, end); + return t; } -export fn poly_ntt_jazz(reg u64 rp) + +export fn poly_ntt_jazz(reg u64 rp) -> reg u64 { + stack u32[2] start end; + reg u64 t; stack u16[MLKEM_N] r; + reg u64 i; + + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + + start = tsc(); r = _poly_ntt(r); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; } -export fn poly_invntt_jazz(reg u64 rp) + +export fn poly_invntt_jazz(reg u64 rp) -> reg u64 { + stack u32[2] start end; + reg u64 t; stack u16[MLKEM_N] r; + reg u64 i; + + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + + start = tsc(); r = _poly_invntt(r); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; } -export fn poly_getnoise_jazz(reg u64 rp, reg u64 seedp, reg u8 nonce) +export fn poly_tomsg_jazz(reg u64 rp ap) -> reg u64 { - stack u16[MLKEM_N] r; - stack u8[MLKEM_SYMBYTES] seed; + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] a; + reg u64 i; + + () = #spill(ap); + i=0; while(i < MLKEM_N){ a[i] = (u16)[ap + 2*i]; i += 1; } + + start = tsc(); - //r = _poly_getnoise_eta1_4x(r, seed, nonce); + a = _poly_tomsg(rp, a); + + end = tsc(); + + () = #unspill(ap); + i=0; while(i < MLKEM_N){ (u16)[ap + 2*i] = a[i]; i += 1; } + + t = cycles(start, end); + return t; } -export fn poly_getnoise_4x_jazz(reg u64 r0 r1 r2 r3, reg u64 seedp, reg u8 nonce) +export fn poly_frommsg_jazz(reg u64 rp ap) -> reg u64 { - stack u16[MLKEM_N] r0 r1 r2 r3; - stack u8[MLKEM_SYMBYTES] seed; + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] r; + reg u64 i; - r0, r1, r2, r3 = _poly_getnoise_eta1_4x(r0, r1, r2, r3, seed, nonce); -} + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + start = tsc(); + r = _poly_frommsg(r, ap); -export fn polyvec_decompress_jazz(reg u64 rp, reg u64 ap) -{ - stack u16[MLKEM_VECN] r; + end = tsc(); - r = __polyvec_decompress(ap); + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; } -export fn polyvec_compress_jazz(reg u64 rp, reg u64 ap) +export fn poly_compress_jazz(reg u64 rp ap) -> reg u64 { - stack u16[MLKEM_VECN] a; + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] a; + reg u64 i; - __polyvec_compress(rp, a); -} + () = #spill(ap); + i=0; while(i < MLKEM_N){ a[i] = (u16)[ap + 2*i]; i += 1; } + start = tsc(); -export fn polyvec_pointwise_acc_jazz(reg u64 rp, reg u64 ap, reg u64 bp) + a = _poly_compress(rp, a); + + end = tsc(); + + () = #unspill(ap); + i=0; while(i < MLKEM_N){ (u16)[ap + 2*i] = a[i]; i += 1; } + + t = cycles(start, end); + return t; +} + +export fn poly_decompress_jazz(reg u64 rp ap) -> reg u64 { - stack u16[MLKEM_VECN] a; - stack u16[MLKEM_VECN] b; + stack u32[2] start end; + reg u64 t; stack u16[MLKEM_N] r; + reg u64 i; - r = __polyvec_pointwise_acc(r, a, b); -} + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + start = tsc(); + + r = _poly_decompress(r, ap); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; -export fn indcpa_keypair_jazz(reg u64 pkp, reg u64 skp, reg ptr u8[MLKEM_SYMBYTES] randomnessp) -{ - __indcpa_keypair(pkp, skp, randomnessp); } +// ////////////////////////////////////////////////////////////////// -export fn indcpa_enc_jazz(reg u64 ctp, reg u64 msgp, reg u64 pkp, reg u64 coinsp) +export fn polyvec_pointwise_acc_jazz(reg u64 rp ap bp) -> reg u64 { - stack u16[MLKEM_VECN] pkpv sp ep bp; - stack u16[MLKEM_K*MLKEM_VECN] aat; - stack u16[MLKEM_N] k epp v; - stack u8[MLKEM_SYMBYTES] publicseed; - stack u8[MLKEM_SYMBYTES] noiseseed; + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_VECN] a; + stack u16[MLKEM_VECN] b; + stack u16[MLKEM_N] r; reg u64 i; - reg u8 c nonce; - stack u64 sctp; - sctp = ctp; + () = #spill(rp); + i=0; while(i < MLKEM_VECN){ a[i] = (u16)[ap + 2*i]; i += 1; } + i=0; while(i < MLKEM_VECN){ b[i] = (u16)[bp + 2*i]; i += 1; } - i = 0; - while (i < MLKEM_SYMBYTES) - { - c = (u8)[coinsp+i]; - noiseseed[(int)i] = c; - i += 1; - } + start = tsc(); - pkpv = __polyvec_frombytes(pkp); + r = __polyvec_pointwise_acc(r, a, b); - i = 0; - pkp += MLKEM_POLYVECBYTES; - while (i < MLKEM_SYMBYTES) - { - c = (u8)[pkp]; - publicseed[(int)i] = c; - pkp += 1; - i += 1; - } + end = tsc(); - k = _poly_frommsg(k, msgp); + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } - aat = __gen_matrix(publicseed, 1); + t = cycles(start, end); + return t; +} - nonce = 0; - sp[0:MLKEM_N], sp[MLKEM_N:MLKEM_N], sp[2*MLKEM_N:MLKEM_N], ep[0:MLKEM_N] = _poly_getnoise_eta1_4x(sp[0:MLKEM_N], sp[MLKEM_N:MLKEM_N], sp[2*MLKEM_N:MLKEM_N], ep[0:MLKEM_N], noiseseed, nonce); +export fn polyvec_compress_jazz(reg u64 rp ap) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_VECN] a; + reg u64 i; - nonce = 4; - ep[MLKEM_N:MLKEM_N], ep[2*MLKEM_N:MLKEM_N], epp, bp[0:MLKEM_N] = _poly_getnoise_eta1_4x(ep[MLKEM_N:MLKEM_N], ep[2*MLKEM_N:MLKEM_N], epp, bp[0:MLKEM_N], noiseseed, nonce); + i=0; while(i < MLKEM_VECN){ a[i] = (u16)[ap + 2*i]; i += 1; } - sp = __polyvec_ntt(sp); - - bp[0:MLKEM_N] = __polyvec_pointwise_acc(bp[0:MLKEM_N], aat[0:MLKEM_VECN], sp); - bp[MLKEM_N:MLKEM_N]= __polyvec_pointwise_acc(bp[MLKEM_N:MLKEM_N], aat[MLKEM_VECN:MLKEM_VECN], sp); - bp[2*MLKEM_N:MLKEM_N] = __polyvec_pointwise_acc(bp[2*MLKEM_N:MLKEM_N], aat[2*MLKEM_VECN:MLKEM_VECN], sp); - - v = __polyvec_pointwise_acc(v, pkpv, sp); + start = tsc(); - bp = __polyvec_invntt(bp); - v = _poly_invntt(v); + __polyvec_compress(rp, a); - bp = __polyvec_add2(bp, ep); - v = _poly_add2(v, epp); - v = _poly_add2(v, k); - bp = __polyvec_reduce(bp); - v = __poly_reduce(v); + end = tsc(); - ctp = sctp; - __polyvec_compress(ctp, bp); - ctp += MLKEM_POLYVECCOMPRESSEDBYTES; - v = _poly_compress(ctp, v); + t = cycles(start, end); + return t; } - -export fn indcpa_dec_jazz(reg u64 msgp, reg u64 ctp, reg u64 skp) +export fn polyvec_decompress_jazz(reg u64 rp ap) -> reg u64 { - __indcpa_dec_0(msgp, ctp, skp); + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_VECN] r; + reg u64 i; + + () = #spill(rp); + start = tsc(); + + r = __polyvec_decompress(ap); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; } -export fn crypto_kem_keypair_jazz(reg u64 pkp, reg u64 skp, reg ptr u8[MLKEM_SYMBYTES*2] randomnessp) +// ////////////////////////////////////////////////////////////////// + +export fn crypto_kem_keypair_jazz(reg u64 pkp skp, reg ptr u8[MLKEM_SYMBYTES*2] randomnessp) { __crypto_kem_keypair_jazz(pkp, skp, randomnessp); } - -export fn crypto_kem_enc_jazz(reg u64 ctp, reg u64 shkp, reg u64 pkp, reg ptr u8[MLKEM_SYMBYTES] randomnessp) +export fn crypto_kem_enc_jazz(reg u64 ctp shkp pkp, reg ptr u8[MLKEM_SYMBYTES] randomnessp) { __crypto_kem_enc_jazz(ctp, shkp, pkp, randomnessp); } -export fn crypto_kem_dec_jazz(reg u64 shkp, reg u64 ctp, reg u64 skp) +export fn crypto_kem_dec_jazz(reg u64 shkp ctp skp) { __crypto_kem_dec_jazz(shkp, ctp, skp); } + diff --git a/code/jasmin/mlkem_avx2/speed.h b/code/jasmin/mlkem_avx2/speed.h index 070b30ac..e521f72d 100644 --- a/code/jasmin/mlkem_avx2/speed.h +++ b/code/jasmin/mlkem_avx2/speed.h @@ -12,28 +12,26 @@ typedef struct{ poly vec[MLKEM_K]; } polyvec; -void gen_matrix_jazz(polyvec *a, unsigned char *seed); +uint64_t gen_matrix_jazz(polyvec *a, unsigned char *seed); -/*Poly functions*/ -void poly_compress_jazz(unsigned char *r, poly *a); -void poly_decompress_jazz(poly *r, const unsigned char *a); +// Poly functions +uint64_t poly_getnoise_4x_jazz(poly *r0, poly *r1, poly *r2, poly *r3,const unsigned char *seed, unsigned char nonce); -void poly_frommsg_jazz(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); -void poly_tomsg_jazz(unsigned char msg[MLKEM_SYMBYTES], poly *r); +uint64_t poly_ntt_jazz(poly *r); +uint64_t poly_invntt_jazz(poly *r); -void poly_getnoise_jazz(poly *r,const unsigned char *seed, unsigned char nonce); -void poly_getnoise_4x_jazz(poly *r0, poly *r1, poly *r2, poly *r3,const unsigned char *seed, unsigned char nonce); +uint64_t poly_frommsg_jazz(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); +uint64_t poly_tomsg_jazz(unsigned char msg[MLKEM_SYMBYTES], poly *r); -void poly_ntt_jazz(poly *r); -void poly_invntt_jazz(poly *r); +uint64_t poly_compress_jazz(unsigned char *r, poly *a); +uint64_t poly_decompress_jazz(poly *r, const unsigned char *a); -/*Polyvec functions*/ -void polyvec_compress_jazz(unsigned char *r, polyvec *a); -void polyvec_decompress_jazz(polyvec *r, const unsigned char *a); +// Polyvec functions +uint64_t polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); +uint64_t polyvec_compress_jazz(unsigned char *r, polyvec *a); +uint64_t polyvec_decompress_jazz(polyvec *r, const unsigned char *a); -void polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); - -/* Indcpa functions*/ +// Indcpa functions void indcpa_keypair_jazz(unsigned char *pk, unsigned char *sk, const unsigned char *randomness); @@ -47,7 +45,7 @@ void indcpa_dec_jazz(unsigned char *m, const unsigned char *c, const unsigned char *sk); -/* KEM functions */ +// KEM functions void crypto_kem_keypair_jazz(unsigned char *pk, unsigned char *sk, const unsigned char *randomness); @@ -56,6 +54,7 @@ void crypto_kem_enc_jazz(unsigned char *c, const unsigned char *m, const unsigned char *pk, const unsigned char *coins); + void crypto_kem_dec_jazz(unsigned char *m, const unsigned char *c, const unsigned char *sk); diff --git a/code/jasmin/mlkem_avx2/test/speed_indcpa.c b/code/jasmin/mlkem_avx2/test/speed_indcpa.c deleted file mode 100644 index 151f0ce4..00000000 --- a/code/jasmin/mlkem_avx2/test/speed_indcpa.c +++ /dev/null @@ -1,105 +0,0 @@ -#include -#include -#include -#include -#include - -#include "../params.h" -#include "../ntt.h" -#include "../indcpa.h" - -#define NRUNS 10000 - -static inline uint64_t cpucycles(void) { - uint64_t result; - - asm volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax" - : "=a" (result) : : "%rdx"); - - return result; -} - -static int cmp_uint64(const void *a, const void *b) { - if(*(uint64_t *)a < *(uint64_t *)b) return -1; - if(*(uint64_t *)a > *(uint64_t *)b) return 1; - return 0; -} - -static uint64_t median(uint64_t *l, size_t llen) { - qsort(l,llen,sizeof(uint64_t),cmp_uint64); - - if(llen%2) return l[llen/2]; - else return (l[llen/2-1]+l[llen/2])/2; -} - -static uint64_t average(uint64_t *t, size_t tlen) { - size_t i; - uint64_t acc=0; - - for(i=0;i stack u32[2] +{ + reg u64 l h; + stack u32[2] t; + + h, l = #RDTSC(); + + t[0] = (32u) l; + t[1] = (32u) h; + + return t; +} + +inline fn cycles(stack u32[2] start end) -> reg u64 +{ + reg u64 t; + + t = end[u64 0]; + t -= start[u64 0]; + + return t; +} + diff --git a/code/jasmin/mlkem_ref/jspeed.jazz b/code/jasmin/mlkem_ref/jspeed.jazz new file mode 100644 index 00000000..d03fa4b0 --- /dev/null +++ b/code/jasmin/mlkem_ref/jspeed.jazz @@ -0,0 +1,297 @@ +require "poly.jinc" +require "polyvec.jinc" +require "gen_matrix.jinc" +require "indcpa.jinc" +require "kem.jinc" +require "verify.jinc" + +require "cycles.jinc" + +// note: this code needs to be reviewed and properly tested + +// exported functions only for benchmarking + +export fn gen_matrix_jazz(reg u64 ap seedp) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u64 aps; + stack u16[MLKEM_K*MLKEM_VECN] a; + stack u8[MLKEM_SYMBYTES] seed; + reg u64 i; + + aps = ap; + i=0; while(i < MLKEM_SYMBYTES){ seed[i] = (u8)[seedp + i]; i += 1; } + + start = tsc(); + + a = __gen_matrix(seed, 1); + + end = tsc(); + + ap = aps; + i=0; while(i < MLKEM_K*MLKEM_VECN){ (u16)[ap + 2*i] = a[i]; i += 1; } + + t = cycles(start, end); + return t; +} + +// ////////////////////////////////////////////////////////////////// + +export fn poly_getnoise_jazz(reg u64 rp, reg u64 seedp, reg u8 nonce) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] r; + stack u8[MLKEM_SYMBYTES] seed; + reg u64 i; + + nonce = nonce; + () = #spill(rp); + i=0; while(i < MLKEM_SYMBYTES){ seed[i] = (u8)[seedp + i]; i += 1; } + + start = tsc(); + + r = _poly_getnoise(r, seed, nonce); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N) + { (u16)[rp + 2*i] = r[i]; + i+= 1; + } + + t = cycles(start, end); + return t; +} + + +export fn poly_ntt_jazz(reg u64 rp) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] r; + reg u64 i; + + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + + start = tsc(); + + r = _poly_ntt(r); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; +} + + +export fn poly_invntt_jazz(reg u64 rp) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] r; + reg u64 i; + + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + + start = tsc(); + + r = _poly_invntt(r); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; +} + + +export fn poly_tomsg_jazz(reg u64 rp ap) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] a; + reg u64 i; + + () = #spill(ap); + i=0; while(i < MLKEM_N){ a[i] = (u16)[ap + 2*i]; i += 1; } + + start = tsc(); + + a = _poly_tomsg(rp, a); + + end = tsc(); + + () = #unspill(ap); + i=0; while(i < MLKEM_N){ (u16)[ap + 2*i] = a[i]; i += 1; } + + t = cycles(start, end); + return t; +} + + +export fn poly_frommsg_jazz(reg u64 rp ap) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] r; + reg u64 i; + + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + + start = tsc(); + + r = _poly_frommsg(r, ap); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; +} + + +export fn poly_compress_jazz(reg u64 rp ap) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] a; + reg u64 i; + + () = #spill(ap); + i=0; while(i < MLKEM_N){ a[i] = (u16)[ap + 2*i]; i += 1; } + + start = tsc(); + + a = _poly_compress(rp, a); + + end = tsc(); + + () = #unspill(ap); + i=0; while(i < MLKEM_N){ (u16)[ap + 2*i] = a[i]; i += 1; } + + t = cycles(start, end); + return t; +} + +export fn poly_decompress_jazz(reg u64 rp ap) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_N] r; + reg u64 i; + + () = #spill(rp); + i=0; while(i < MLKEM_N){ r[i] = (u16)[rp + 2*i]; i += 1; } + + start = tsc(); + + r = _poly_decompress(r, ap); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; + +} + +// ////////////////////////////////////////////////////////////////// + +export fn polyvec_pointwise_acc_jazz(reg u64 rp ap bp) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_VECN] a; + stack u16[MLKEM_VECN] b; + stack u16[MLKEM_N] r; + reg u64 i; + + () = #spill(rp); + i=0; while(i < MLKEM_VECN){ a[i] = (u16)[ap + 2*i]; i += 1; } + i=0; while(i < MLKEM_VECN){ b[i] = (u16)[bp + 2*i]; i += 1; } + + start = tsc(); + + r = __polyvec_pointwise_acc(a, b); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; +} + +export fn polyvec_compress_jazz(reg u64 rp ap) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_VECN] a; + reg u64 i; + + i=0; while(i < MLKEM_VECN){ a[i] = (u16)[ap + 2*i]; i += 1; } + + start = tsc(); + + __polyvec_compress(rp, a); + + end = tsc(); + + t = cycles(start, end); + return t; +} + +export fn polyvec_decompress_jazz(reg u64 rp ap) -> reg u64 +{ + stack u32[2] start end; + reg u64 t; + stack u16[MLKEM_VECN] r; + reg u64 i; + + () = #spill(rp); + start = tsc(); + + r = __polyvec_decompress(ap); + + end = tsc(); + + () = #unspill(rp); + i=0; while(i < MLKEM_N){ (u16)[rp + 2*i] = r[i]; i += 1; } + + t = cycles(start, end); + return t; +} + +// ////////////////////////////////////////////////////////////////// + +export fn crypto_kem_keypair_jazz(reg u64 pkp skp, reg ptr u8[MLKEM_SYMBYTES*2] randomnessp) +{ + __crypto_kem_keypair_jazz(pkp, skp, randomnessp); +} + +export fn crypto_kem_enc_jazz(reg u64 ctp shkp pkp, reg ptr u8[MLKEM_SYMBYTES] randomnessp) +{ + __crypto_kem_enc_jazz(ctp, shkp, pkp, randomnessp); +} + +export fn crypto_kem_dec_jazz(reg u64 shkp ctp skp) +{ + __crypto_kem_dec_jazz(shkp, ctp, skp); +} + diff --git a/code/jasmin/mlkem_ref/speed.h b/code/jasmin/mlkem_ref/speed.h new file mode 100644 index 00000000..f03345d9 --- /dev/null +++ b/code/jasmin/mlkem_ref/speed.h @@ -0,0 +1,61 @@ +#ifndef SPEED_H +#define SPEED_H + +#include +#include "params.h" + +typedef struct{ + int16_t __attribute__((aligned(32))) coeffs[MLKEM_N]; +} poly; + +typedef struct{ + poly vec[MLKEM_K]; +} polyvec; + +uint64_t gen_matrix_jazz(polyvec *a, unsigned char *seed); + +// Poly functions +uint64_t poly_getnoise_jazz(poly *r,const unsigned char *seed, unsigned char nonce); + +uint64_t poly_ntt_jazz(poly *r); +uint64_t poly_invntt_jazz(poly *r); + +uint64_t poly_frommsg_jazz(poly *r, const unsigned char msg[MLKEM_SYMBYTES]); +uint64_t poly_tomsg_jazz(unsigned char msg[MLKEM_SYMBYTES], poly *r); + +uint64_t poly_compress_jazz(unsigned char *r, poly *a); +uint64_t poly_decompress_jazz(poly *r, const unsigned char *a); + +// Polyvec functions +uint64_t polyvec_pointwise_acc_jazz(poly *r, const polyvec *a, const polyvec *b); +uint64_t polyvec_compress_jazz(unsigned char *r, polyvec *a); +uint64_t polyvec_decompress_jazz(polyvec *r, const unsigned char *a); + +// Indcpa functions +void indcpa_keypair_jazz(unsigned char *pk, + unsigned char *sk, + const unsigned char *randomness); + +void indcpa_enc_jazz(unsigned char *c, + const unsigned char *m, + const unsigned char *pk, + const unsigned char *coins); + +void indcpa_dec_jazz(unsigned char *m, + const unsigned char *c, + const unsigned char *sk); + +// KEM functions +void crypto_kem_keypair_jazz(unsigned char *pk, + unsigned char *sk, + const unsigned char *randomness); + +void crypto_kem_enc_jazz(unsigned char *c, + const unsigned char *m, + const unsigned char *pk, + const unsigned char *coins); + +void crypto_kem_dec_jazz(unsigned char *m, + const unsigned char *c, + const unsigned char *sk); +#endif diff --git a/code/jasmin/mlkem_ref/test/speed_mlkem.c b/code/jasmin/mlkem_ref/test/speed_mlkem.c new file mode 100644 index 00000000..d70ff119 --- /dev/null +++ b/code/jasmin/mlkem_ref/test/speed_mlkem.c @@ -0,0 +1,226 @@ +#include +#include +#include +#include +#include + +#include "../params.h" +#include "../speed.h" + +#define NRUNS 10000 + +static inline uint64_t cpucycles(void) { + uint64_t result; + + asm volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax" + : "=a" (result) : : "%rdx"); + + return result; +} + +static int cmp_uint64(const void *a, const void *b) { + if(*(uint64_t *)a < *(uint64_t *)b) return -1; + if(*(uint64_t *)a > *(uint64_t *)b) return 1; + return 0; +} + +static uint64_t median(uint64_t *l, size_t llen) { + qsort(l,llen,sizeof(uint64_t),cmp_uint64); + + if(llen%2) return l[llen/2]; + else return (l[llen/2-1]+l[llen/2])/2; +} + +static uint64_t average(uint64_t *t, size_t tlen) { + size_t i; + uint64_t acc=0; + + for(i=0;i