From c8f796ab72aec0200104f5d6332a2a4bd6d5b570 Mon Sep 17 00:00:00 2001 From: Tiago Oliveira Date: Thu, 12 Oct 2023 15:42:59 +0100 Subject: [PATCH] curve25519: mulx implementation from 6.6K .s lines to 2.9K .s lines; isolating loads from internal code; --- .../curve25519/amd64/common/51/decode_u5.jinc | 53 ++++++++ .../amd64/common/51/init_points5.jinc | 56 +++++++++ .../curve25519/amd64/common/51/load5.jinc | 113 ------------------ .../curve25519/amd64/common/64/decode_u4.jinc | 18 +++ .../64/{load4.jinc => init_points4.jinc} | 24 ---- .../curve25519/amd64/common/decode.jinc | 34 ------ .../amd64/common/decode_scalar.jinc | 28 +++++ .../curve25519/amd64/common/load_store4.jinc | 19 +++ .../curve25519/amd64/mulx/curve25519.jinc | 32 ++--- .../curve25519/amd64/mulx/invert4.jinc | 53 ++++---- .../curve25519/amd64/mulx/mul4.jinc | 48 ++++++++ .../curve25519/amd64/mulx/scalarmult.jazz | 30 ++++- .../curve25519/amd64/mulx/sqr4.jinc | 35 +++++- .../curve25519/amd64/ref4/curve25519.jinc | 32 ++--- .../curve25519/amd64/ref4/scalarmult.jazz | 30 ++++- .../curve25519/amd64/ref5/curve25519.jinc | 30 ++--- .../curve25519/amd64/ref5/scalarmult.jazz | 31 ++++- 17 files changed, 391 insertions(+), 275 deletions(-) create mode 100644 src/crypto_scalarmult/curve25519/amd64/common/51/decode_u5.jinc create mode 100644 src/crypto_scalarmult/curve25519/amd64/common/51/init_points5.jinc delete mode 100644 src/crypto_scalarmult/curve25519/amd64/common/51/load5.jinc create mode 100644 src/crypto_scalarmult/curve25519/amd64/common/64/decode_u4.jinc rename src/crypto_scalarmult/curve25519/amd64/common/64/{load4.jinc => init_points4.jinc} (69%) delete mode 100644 src/crypto_scalarmult/curve25519/amd64/common/decode.jinc create mode 100644 src/crypto_scalarmult/curve25519/amd64/common/decode_scalar.jinc create mode 100644 src/crypto_scalarmult/curve25519/amd64/common/load_store4.jinc diff --git a/src/crypto_scalarmult/curve25519/amd64/common/51/decode_u5.jinc b/src/crypto_scalarmult/curve25519/amd64/common/51/decode_u5.jinc new file mode 100644 index 00000000..bc467d49 --- /dev/null +++ b/src/crypto_scalarmult/curve25519/amd64/common/51/decode_u5.jinc @@ -0,0 +1,53 @@ +inline fn __decode_u_coordinate5(reg u64[4] t) -> reg u64[5] +{ + reg u64[5] u; + reg u64 mask; + + mask = 0x7ffffffffffff; + + //u[0] = t[0] & mask; // 51; 13 left + u[0] = t[0]; + u[0] &= mask; + + //u[1] = (t[1] << 13) || (t[0] >> 51) & mask; // 38; 26 left + u[1] = t[1]; + u[1] <<= 13; + t[0] >>= 51; + u[1] |= t[0]; + u[1] &= mask; + + //u[2] = (t[2] << 26) || (t[1] >> 38) & mask; // 25; 39 left + u[2] = t[2]; + u[2] <<= 26; + t[1] >>= 38; + u[2] |= t[1]; + u[2] &= mask; + + //u[3] = (t[3] << 39) || (t[2] >> 25) & mask; // 12; '52' left + u[3] = t[3]; + u[3] <<= 39; + t[2] >>= 25; + u[3] |= t[2]; + u[3] &= mask; + + //u[4] = (t[3] >> 12) & mask; + u[4] = t[3]; + u[4] >>= 12; + u[4] &= mask; + + return u; +} + +inline fn __decode_u_coordinate_base5() -> reg u64[5] +{ + reg u64[5] u; + + u[0] = 9; + u[1] = 0; + u[2] = 0; + u[3] = 0; + u[4] = 0; + + return u; +} + diff --git a/src/crypto_scalarmult/curve25519/amd64/common/51/init_points5.jinc b/src/crypto_scalarmult/curve25519/amd64/common/51/init_points5.jinc new file mode 100644 index 00000000..fe705c48 --- /dev/null +++ b/src/crypto_scalarmult/curve25519/amd64/common/51/init_points5.jinc @@ -0,0 +1,56 @@ +inline fn __init_points5( + reg u64[5] initr) + -> + stack u64[5], + reg u64[5], + stack u64[5], + stack u64[5] +{ + inline int i; + stack u64[5] x2 x3 z3; + reg u64[5] z2r; + reg u64 z; + + ?{}, z = #set0(); + + x2[0] = 1; + z2r[0] = 0; + x3 = #copy(initr); + z3[0] = 1; + + for i=1 to 5 + { x2[i] = z; + z2r[i] = z; + z3[i] = z; + } + + // (1, 0, init, 1) + return x2, z2r, x3, z3; +} + +inline fn __init_points5_x3() + -> + stack u64[5], + reg u64[5], + stack u64[5] +{ + inline int i; + stack u64[5] f1s f3s; + reg u64[5] f2; + reg u64 z; + + ?{}, z = #set0(); + + f1s[0] = 1; + f2[0] = 1; + f3s[0] = 1; + + for i=1 to 5 + { f1s[i] = z; + f2[i] = z; + f3s[i] = z; + } + + return f1s, f2, f3s; +} + diff --git a/src/crypto_scalarmult/curve25519/amd64/common/51/load5.jinc b/src/crypto_scalarmult/curve25519/amd64/common/51/load5.jinc deleted file mode 100644 index 9511e3c7..00000000 --- a/src/crypto_scalarmult/curve25519/amd64/common/51/load5.jinc +++ /dev/null @@ -1,113 +0,0 @@ -inline fn __decode_u_coordinate5(reg u64 up) -> reg u64[5] -{ - inline int i; - reg u64[4] t; - reg u64[5] u; - reg u64 mask; - - for i=0 to 4 - { t[i] = [up + 8*i]; } - mask = 0x7ffffffffffff; - - //u[0] = t[0] & mask; // 51; 13 left - u[0] = t[0]; - u[0] &= mask; - - //u[1] = (t[1] << 13) || (t[0] >> 51) & mask; // 38; 26 left - u[1] = t[1]; - u[1] <<= 13; - t[0] >>= 51; - u[1] |= t[0]; - u[1] &= mask; - - //u[2] = (t[2] << 26) || (t[1] >> 38) & mask; // 25; 39 left - u[2] = t[2]; - u[2] <<= 26; - t[1] >>= 38; - u[2] |= t[1]; - u[2] &= mask; - - //u[3] = (t[3] << 39) || (t[2] >> 25) & mask; // 12; '52' left - u[3] = t[3]; - u[3] <<= 39; - t[2] >>= 25; - u[3] |= t[2]; - u[3] &= mask; - - //u[4] = (t[3] >> 12) & mask; - u[4] = t[3]; - u[4] >>= 12; - u[4] &= mask; - - return u; -} - -inline fn __decode_u_coordinate_base5() -> reg u64[5] -{ - reg u64[5] u; - - u[0] = 9; - u[1] = 0; - u[2] = 0; - u[3] = 0; - u[4] = 0; - - return u; -} - -inline fn __init_points5( - reg u64[5] initr) - -> - stack u64[5], - reg u64[5], - stack u64[5], - stack u64[5] -{ - inline int i; - stack u64[5] x2 x3 z3; - reg u64[5] z2r; - reg u64 z; - - ?{}, z = #set0(); - - x2[0] = 1; - z2r[0] = 0; - x3 = #copy(initr); - z3[0] = 1; - - for i=1 to 5 - { x2[i] = z; - z2r[i] = z; - z3[i] = z; - } - - // (1, 0, init, 1) - return x2, z2r, x3, z3; -} - -inline fn __init_points5_x3() - -> - stack u64[5], - reg u64[5], - stack u64[5] -{ - inline int i; - stack u64[5] f1s f3s; - reg u64[5] f2; - reg u64 z; - - ?{}, z = #set0(); - - f1s[0] = 1; - f2[0] = 1; - f3s[0] = 1; - - for i=1 to 5 - { f1s[i] = z; - f2[i] = z; - f3s[i] = z; - } - - return f1s, f2, f3s; -} - diff --git a/src/crypto_scalarmult/curve25519/amd64/common/64/decode_u4.jinc b/src/crypto_scalarmult/curve25519/amd64/common/64/decode_u4.jinc new file mode 100644 index 00000000..04b1029c --- /dev/null +++ b/src/crypto_scalarmult/curve25519/amd64/common/64/decode_u4.jinc @@ -0,0 +1,18 @@ +inline fn __decode_u_coordinate4(reg u64[4] u) -> reg u64[4] +{ + u[3] &= 0x7fffffffffffffff; + return u; +} + +inline fn __decode_u_coordinate_base4() -> reg u64[4] +{ + reg u64[4] u; + + u[0] = 9; + u[1] = 0; + u[2] = 0; + u[3] = 0; + + return u; +} + diff --git a/src/crypto_scalarmult/curve25519/amd64/common/64/load4.jinc b/src/crypto_scalarmult/curve25519/amd64/common/64/init_points4.jinc similarity index 69% rename from src/crypto_scalarmult/curve25519/amd64/common/64/load4.jinc rename to src/crypto_scalarmult/curve25519/amd64/common/64/init_points4.jinc index 5f01ce07..e6d72583 100644 --- a/src/crypto_scalarmult/curve25519/amd64/common/64/load4.jinc +++ b/src/crypto_scalarmult/curve25519/amd64/common/64/init_points4.jinc @@ -1,27 +1,3 @@ -inline fn __decode_u_coordinate4(reg u64 up) -> reg u64[4] -{ - inline int i; - reg u64[4] u; - - for i=0 to 4 - { u[i] = [up + 8*i]; } - u[3] &= 0x7fffffffffffffff; - - return u; -} - -inline fn __decode_u_coordinate_base4() -> reg u64[4] -{ - reg u64[4] u; - - u[0] = 9; - u[1] = 0; - u[2] = 0; - u[3] = 0; - - return u; -} - inline fn __init_points4( reg u64[4] initr) -> diff --git a/src/crypto_scalarmult/curve25519/amd64/common/decode.jinc b/src/crypto_scalarmult/curve25519/amd64/common/decode.jinc deleted file mode 100644 index 19fbf18e..00000000 --- a/src/crypto_scalarmult/curve25519/amd64/common/decode.jinc +++ /dev/null @@ -1,34 +0,0 @@ -inline fn __decode_scalar(reg u64 kp) -> stack u8[32] -{ - inline int i; - stack u8[32] k; - reg u64 t; - - for i=0 to 4 - { t = [kp + 8*i]; - k[u64 i] = t; } - - k[0] &= 0xf8; - k[31] &= 0x7f; - k[31] |= 0x40; - - return k; -} - -inline fn __decode_scalar_shl1(reg u64 kp) -> stack u64[4] -{ - inline int i; - reg u64[4] k; - stack u64[4] ks; - - for i=0 to 4 - { k[i] = [kp + 8*i]; } - k[3] <<= 1; - k[0] &= 0xfffffffffffffff8; - k[3] |= 0x8000000000000000; - - ks = #copy(k); - - return ks; -} - diff --git a/src/crypto_scalarmult/curve25519/amd64/common/decode_scalar.jinc b/src/crypto_scalarmult/curve25519/amd64/common/decode_scalar.jinc new file mode 100644 index 00000000..ce2e08bb --- /dev/null +++ b/src/crypto_scalarmult/curve25519/amd64/common/decode_scalar.jinc @@ -0,0 +1,28 @@ +inline fn __decode_scalar(reg u64[4] k) -> stack u8[32] +{ + inline int i; + stack u8[32] ks; + + for i=0 to 4 + { ks[u64 i] = k[i]; } + + ks[0] &= 0xf8; + ks[31] &= 0x7f; + ks[31] |= 0x40; + + return ks; +} + +inline fn __decode_scalar_shl1(reg u64[4] k) -> stack u64[4] +{ + stack u64[4] ks; + + k[3] <<= 1; + k[0] &= 0xfffffffffffffff8; + k[3] |= 0x8000000000000000; + + ks = #copy(k); + + return ks; +} + diff --git a/src/crypto_scalarmult/curve25519/amd64/common/load_store4.jinc b/src/crypto_scalarmult/curve25519/amd64/common/load_store4.jinc new file mode 100644 index 00000000..d417cfba --- /dev/null +++ b/src/crypto_scalarmult/curve25519/amd64/common/load_store4.jinc @@ -0,0 +1,19 @@ +inline fn __load4(reg u64 p) -> reg u64[4] +{ + inline int i; + reg u64[4] a; + + for i=0 to 4 + { a[i] = [p + 8*i]; } + + return a; +} + +inline fn __store4(reg u64 p, reg u64[4] a) +{ + inline int i; + + for i=0 to 4 + { [p + 8*i] = a[i]; } +} + diff --git a/src/crypto_scalarmult/curve25519/amd64/mulx/curve25519.jinc b/src/crypto_scalarmult/curve25519/amd64/mulx/curve25519.jinc index d6eae363..280c44f6 100644 --- a/src/crypto_scalarmult/curve25519/amd64/mulx/curve25519.jinc +++ b/src/crypto_scalarmult/curve25519/amd64/mulx/curve25519.jinc @@ -1,10 +1,11 @@ from Jade require "crypto_scalarmult/curve25519/amd64/common/bit.jinc" -from Jade require "crypto_scalarmult/curve25519/amd64/common/decode.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/decode_scalar.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/64/decode_u4.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/64/init_points4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/add4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/sub4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/cswap4.jinc" -from Jade require "crypto_scalarmult/curve25519/amd64/common/64/load4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/tobytes4.jinc" require "mul4.jinc" @@ -141,38 +142,27 @@ inline fn __curve25519_internal_mulx(stack u8[32] k, reg u64[4] u) -> reg u64[4] return r; } -inline fn __curve25519_mulx(reg u64 rp kp up) +inline fn __curve25519_mulx(reg u64[4] _k _u) -> reg u64[4] { - inline int i; stack u8[32] k; reg u64[4] u r; - stack u64 rps; - rps = rp; - - k = __decode_scalar(kp); - u = __decode_u_coordinate4(up); + k = __decode_scalar(_k); + u = __decode_u_coordinate4(_u); r = __curve25519_internal_mulx(k, u); - rp = rps; - for i=0 to 4 - { [rp + 8*i] = r[i]; } + return r; } -inline fn __curve25519_mulx_base(reg u64 rp kp) +inline fn __curve25519_mulx_base(reg u64[4] _k) -> reg u64[4] { - inline int i; stack u8[32] k; reg u64[4] u r; - stack u64 rps; - - rps = rp; - k = __decode_scalar(kp); + k = __decode_scalar(_k); u = __decode_u_coordinate_base4(); r = __curve25519_internal_mulx(k, u); - rp = rps; - for i=0 to 4 - { [rp + 8*i] = r[i]; } + return r; } + diff --git a/src/crypto_scalarmult/curve25519/amd64/mulx/invert4.jinc b/src/crypto_scalarmult/curve25519/amd64/mulx/invert4.jinc index 727e3bca..22cce58b 100644 --- a/src/crypto_scalarmult/curve25519/amd64/mulx/invert4.jinc +++ b/src/crypto_scalarmult/curve25519/amd64/mulx/invert4.jinc @@ -1,9 +1,6 @@ require "mul4.jinc" require "sqr4.jinc" -// TODO: -// - rewrite invert4 (& aux. functions) once reg arrays are supported in non-inline functions -// - this should reduce the size of scalarmult.s to half inline fn __invert4(reg u64[4] f) -> reg u64[4] { reg u32 i; @@ -13,90 +10,90 @@ inline fn __invert4(reg u64[4] f) -> reg u64[4] fs = #copy(f); // z2 = z1^2^1 - t0 = __sqr4_rr(f); + t0 = _sqr4_rr_(f); t0s = #copy(t0); // z8 = z2^2^2 - t1 = __sqr4_rr(t0); - t1 = __sqr4_rr(t1); + t1 = _sqr4_rr_(t0); + t1 = _sqr4_rr_(t1); // z9 = z1*z8 - t1 = __mul4_rsr(fs,t1); + t1 = _mul4_rsr_(fs,t1); t1s = #copy(t1); // z11 = z2*z9 - t0 = __mul4_rsr(t0s,t1); + t0 = _mul4_rsr_(t0s,t1); t0s = #copy(t0); // z22 = z11^2^1 - t2 = __sqr4_rr(t0); + t2 = _sqr4_rr_(t0); // z_5_0 = z9*z22 - t1 = __mul4_rsr(t1s,t2); + t1 = _mul4_rsr_(t1s,t2); t1s = #copy(t1); // z_10_5 = z_5_0^2^5 - t2 = __sqr4_rr(t1); + t2 = _sqr4_rr_(t1); i = 4/2; - t2 = __it_sqr4_x2(t2, i); + t2 = _it_sqr4_x2_(t2, i); t2s = #copy(t2); // z_10_0 = z_10_5*z_5_0 - t1 = __mul4_rsr(t1s,t2); + t1 = _mul4_rsr_(t1s,t2); t1s = #copy(t1); // z_20_10 = z_10_0^2^10 i = 10/2; - t2 = __it_sqr4_x2(t1, i); + t2 = _it_sqr4_x2_(t1, i); // z_20_0 = z_20_10*z_10_0 - t2 = __mul4_rsr(t1s,t2); + t2 = _mul4_rsr_(t1s,t2); t2s = #copy(t2); // z_40_20 = z_20_0^2^20 i = 20/2; - t3 = __it_sqr4_x2(t2, i); + t3 = _it_sqr4_x2_(t2, i); // z_40_0 = z_40_20*z_20_0 - t2 = __mul4_rsr(t2s,t3); + t2 = _mul4_rsr_(t2s,t3); // z_50_10 = z_40_0^2^10 i = 10/2; - t2 = __it_sqr4_x2(t2, i); + t2 = _it_sqr4_x2_(t2, i); // z_50_0 = z_50_10*z_10_0 - t1 = __mul4_rsr(t1s,t2); + t1 = _mul4_rsr_(t1s,t2); t1s = #copy(t1); // z_100_50 = z_50_0^2^50 i = 50/2; - t2 = __it_sqr4_x2(t1, i); + t2 = _it_sqr4_x2_(t1, i); // z_100_0 = z_100_50*z_50_0 - t2 = __mul4_rsr(t1s,t2); + t2 = _mul4_rsr_(t1s,t2); t2s = #copy(t2); // z_200_100 = z_100_0^2^100 i = 100/2; - t3 = __it_sqr4_x2(t2, i); + t3 = _it_sqr4_x2_(t2, i); // z_200_0 = z_200_100*z_100_0 - t2 = __mul4_rsr(t2s,t3); + t2 = _mul4_rsr_(t2s,t3); // z_250_50 = z_200_0^2^50 i = 50/2; - t2 = __it_sqr4_x2(t2, i); + t2 = _it_sqr4_x2_(t2, i); // z_250_0 = z_250_50*z_50_0 - t1 = __mul4_rsr(t1s,t2); + t1 = _mul4_rsr_(t1s,t2); // z_255_5 = z_250_0^2^5 i = 4/2; - t1 = __it_sqr4_x2(t1, i); - t1 = __sqr4_rr(t1); + t1 = _it_sqr4_x2_(t1, i); + t1 = _sqr4_rr_(t1); // z_255_21 = z_255_5*z11 - t1 = __mul4_rsr(t0s,t1); + t1 = _mul4_rsr_(t0s,t1); return t1; } diff --git a/src/crypto_scalarmult/curve25519/amd64/mulx/mul4.jinc b/src/crypto_scalarmult/curve25519/amd64/mulx/mul4.jinc index 0cbb33e8..54132927 100644 --- a/src/crypto_scalarmult/curve25519/amd64/mulx/mul4.jinc +++ b/src/crypto_scalarmult/curve25519/amd64/mulx/mul4.jinc @@ -168,6 +168,54 @@ inline fn __mul4_rsr(stack u64[4] fs, reg u64[4] g) -> reg u64[4] return h; } +inline fn __mul4_rpr(reg ptr u64[4] fp, reg u64[4] g) -> reg u64[4] +{ + reg bool cf of; + reg u64[4] h r; + reg u64 _38 f z; + + of, cf, _, _, _, z = #set0(); + + f = fp[0]; + h, r, cf, of = __mul4_c0( f, g, z, cf, of); + + f = fp[1]; + h, r, cf, of = __mul4_c1(h, r, f, g, z, cf, of); + + f = fp[2]; + h, r, cf, of = __mul4_c2(h, r, f, g, z, cf, of); + + f = fp[3]; + h, r, cf, of = __mul4_c3(h, r, f, g, z, cf, of); + + _38 = 38; + h = __reduce4(h, r, _38, z, cf, of); + + return h; +} + +fn _mul4_rpr(reg ptr u64[4] fp, reg u64[4] g) -> reg u64[4] +{ + reg u64[4] h; + + h = __mul4_rpr(fp, g); + + return h; +} + +inline fn _mul4_rsr_(stack u64[4] _fs, reg u64[4] _g) -> reg u64[4] +{ + reg ptr u64[4] fp; + reg u64[4] _h h g; + + fp = _fs; + g = #copy(_g); + h = _mul4_rpr(fp, g); + _h = #copy(h); + + return _h; +} + inline fn __mul4_ssr(stack u64[4] fs, reg u64[4] g) -> stack u64[4] { stack u64[4] hs; diff --git a/src/crypto_scalarmult/curve25519/amd64/mulx/scalarmult.jazz b/src/crypto_scalarmult/curve25519/amd64/mulx/scalarmult.jazz index 04672100..d6146eca 100644 --- a/src/crypto_scalarmult/curve25519/amd64/mulx/scalarmult.jazz +++ b/src/crypto_scalarmult/curve25519/amd64/mulx/scalarmult.jazz @@ -1,17 +1,39 @@ +from Jade require "crypto_scalarmult/curve25519/amd64/common/load_store4.jinc" require "curve25519.jinc" -export fn jade_scalarmult_curve25519_amd64_mulx(reg u64 q n p) -> reg u64 +export fn jade_scalarmult_curve25519_amd64_mulx(reg u64 qp np pp) -> reg u64 { reg u64 r; - __curve25519_mulx(q, n, p); + stack u64 qps; + reg u64[4] q n p; + + qps = qp; + n = __load4(np); + p = __load4(pp); + + q = __curve25519_mulx(n, p); + + qp = qps; + __store4(qp, q); + ?{}, r = #set0(); return r; } -export fn jade_scalarmult_curve25519_amd64_mulx_base(reg u64 q n) -> reg u64 +export fn jade_scalarmult_curve25519_amd64_mulx_base(reg u64 qp np) -> reg u64 { reg u64 r; - __curve25519_mulx_base(q, n); + stack u64 qps; + reg u64[4] q n; + + qps = qp; + n = __load4(np); + + q = __curve25519_mulx_base(n); + + qp = qps; + __store4(qp, q); + ?{}, r = #set0(); return r; } diff --git a/src/crypto_scalarmult/curve25519/amd64/mulx/sqr4.jinc b/src/crypto_scalarmult/curve25519/amd64/mulx/sqr4.jinc index 46fd5c62..f659aa9d 100644 --- a/src/crypto_scalarmult/curve25519/amd64/mulx/sqr4.jinc +++ b/src/crypto_scalarmult/curve25519/amd64/mulx/sqr4.jinc @@ -29,7 +29,7 @@ inline fn __sqr4_rr(reg u64[4] f) -> reg u64[4] of, h[3] = #ADOX ( h[3], t[3], of ); cf, r[0] = #ADCX ( r[0], t[4], cf ); - + (r[1], t[4]) = #MULX ( fx, f[3] ); // f1*f3 of, r[0] = #ADOX ( r[0], t[4], of ); @@ -81,6 +81,24 @@ inline fn __sqr4_rr(reg u64[4] f) -> reg u64[4] return h; } +fn _sqr4_rr(reg u64[4] f) -> reg u64[4] +{ + reg u64[4] h; + h = __sqr4_rr(f); + return h; +} + +inline fn _sqr4_rr_(reg u64[4] _f) -> reg u64[4] +{ + reg u64[4] _h h f; + + f = #copy(_f); + h = _sqr4_rr(f); + _h = #copy(h); + + return _h; +} + inline fn __it_sqr4_x2(reg u64[4] f, reg u32 i) -> reg u64[4] { reg bool zf; @@ -100,6 +118,21 @@ inline fn __it_sqr4_x2(reg u64[4] f, reg u32 i) -> reg u64[4] return f; } +fn _it_sqr4_x2(reg u64[4] f, reg u32 i) -> reg u64[4] +{ + f = __it_sqr4_x2(f, i); + return f; +} + +inline fn _it_sqr4_x2_(reg u64[4] _f, reg u32 i) -> reg u64[4] +{ + reg u64[4] f; + f = #copy(_f); + f = _it_sqr4_x2(f, i); + return f; +} + + inline fn __sqr4_ss(stack u64[4] fs) -> stack u64[4] { stack u64[4] hs; diff --git a/src/crypto_scalarmult/curve25519/amd64/ref4/curve25519.jinc b/src/crypto_scalarmult/curve25519/amd64/ref4/curve25519.jinc index 4663cfcd..47c40757 100644 --- a/src/crypto_scalarmult/curve25519/amd64/ref4/curve25519.jinc +++ b/src/crypto_scalarmult/curve25519/amd64/ref4/curve25519.jinc @@ -1,10 +1,11 @@ from Jade require "crypto_scalarmult/curve25519/amd64/common/bit.jinc" -from Jade require "crypto_scalarmult/curve25519/amd64/common/decode.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/decode_scalar.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/64/decode_u4.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/64/init_points4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/add4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/sub4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/cswap4.jinc" -from Jade require "crypto_scalarmult/curve25519/amd64/common/64/load4.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/64/tobytes4.jinc" require "mul4.jinc" @@ -142,38 +143,27 @@ inline fn __curve25519_internal_ref4(stack u8[32] k, reg u64[4] u) -> reg u64[4] return r; } -inline fn __curve25519_ref4(reg u64 rp kp up) +inline fn __curve25519_ref4(reg u64[4] _k _u) -> reg u64[4] { - inline int i; stack u8[32] k; reg u64[4] u r; - stack u64 rps; - rps = rp; - - k = __decode_scalar(kp); - u = __decode_u_coordinate4(up); + k = __decode_scalar(_k); + u = __decode_u_coordinate4(_u); r = __curve25519_internal_ref4(k, u); - rp = rps; - for i=0 to 4 - { [rp + 8*i] = r[i]; } + return r; } -inline fn __curve25519_ref4_base(reg u64 rp kp) +inline fn __curve25519_ref4_base(reg u64[4] _k) -> reg u64[4] { - inline int i; stack u8[32] k; reg u64[4] u r; - stack u64 rps; - - rps = rp; - k = __decode_scalar(kp); + k = __decode_scalar(_k); u = __decode_u_coordinate_base4(); r = __curve25519_internal_ref4(k, u); - rp = rps; - for i=0 to 4 - { [rp + 8*i] = r[i]; } + return r; } + diff --git a/src/crypto_scalarmult/curve25519/amd64/ref4/scalarmult.jazz b/src/crypto_scalarmult/curve25519/amd64/ref4/scalarmult.jazz index 9fb9ac00..7750b8c6 100644 --- a/src/crypto_scalarmult/curve25519/amd64/ref4/scalarmult.jazz +++ b/src/crypto_scalarmult/curve25519/amd64/ref4/scalarmult.jazz @@ -1,17 +1,39 @@ +from Jade require "crypto_scalarmult/curve25519/amd64/common/load_store4.jinc" require "curve25519.jinc" -export fn jade_scalarmult_curve25519_amd64_ref4(reg u64 q n p) -> reg u64 +export fn jade_scalarmult_curve25519_amd64_ref4(reg u64 qp np pp) -> reg u64 { reg u64 r; - __curve25519_ref4(q, n, p); + stack u64 qps; + reg u64[4] q n p; + + qps = qp; + n = __load4(np); + p = __load4(pp); + + q = __curve25519_ref4(n, p); + + qp = qps; + __store4(qp, q); + ?{}, r = #set0(); return r; } -export fn jade_scalarmult_curve25519_amd64_ref4_base(reg u64 q n) -> reg u64 +export fn jade_scalarmult_curve25519_amd64_ref4_base(reg u64 qp np) -> reg u64 { reg u64 r; - __curve25519_ref4_base(q, n); + stack u64 qps; + reg u64[4] q n; + + qps = qp; + n = __load4(np); + + q = __curve25519_ref4_base(n); + + qp = qps; + __store4(qp, q); + ?{}, r = #set0(); return r; } diff --git a/src/crypto_scalarmult/curve25519/amd64/ref5/curve25519.jinc b/src/crypto_scalarmult/curve25519/amd64/ref5/curve25519.jinc index 681969bd..6963fcde 100644 --- a/src/crypto_scalarmult/curve25519/amd64/ref5/curve25519.jinc +++ b/src/crypto_scalarmult/curve25519/amd64/ref5/curve25519.jinc @@ -1,10 +1,11 @@ from Jade require "crypto_scalarmult/curve25519/amd64/common/bit.jinc" -from Jade require "crypto_scalarmult/curve25519/amd64/common/decode.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/decode_scalar.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/51/decode_u5.jinc" +from Jade require "crypto_scalarmult/curve25519/amd64/common/51/init_points5.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/51/add5.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/51/sub5.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/51/cswap5.jinc" -from Jade require "crypto_scalarmult/curve25519/amd64/common/51/load5.jinc" from Jade require "crypto_scalarmult/curve25519/amd64/common/51/tobytes5.jinc" require "mul5.jinc" @@ -142,40 +143,29 @@ inline fn __curve25519_internal_ref5(stack u8[32] k, reg u64[5] u) -> reg u64[4] return r; } -inline fn __curve25519_ref5(reg u64 rp kp up) +inline fn __curve25519_ref5(reg u64[4] _k _u) -> reg u64[4] { - inline int i; stack u8[32] k; reg u64[5] u; reg u64[4] r; - stack u64 rps; - rps = rp; - k = __decode_scalar(kp); - u = __decode_u_coordinate5(up); + k = __decode_scalar(_k); + u = __decode_u_coordinate5(_u); r = __curve25519_internal_ref5(k, u); - rp = rps; - for i=0 to 4 - { [rp + 8*i] = r[i]; } + return r; } -inline fn __curve25519_ref5_base(reg u64 rp kp) +inline fn __curve25519_ref5_base(reg u64[4] _k) -> reg u64[4] { - inline int i; stack u8[32] k; reg u64[5] u; reg u64[4] r; - stack u64 rps; - rps = rp; - k = __decode_scalar(kp); + k = __decode_scalar(_k); u = __decode_u_coordinate_base5(); r = __curve25519_internal_ref5(k, u); - rp = rps; - for i=0 to 4 - { [rp + 8*i] = r[i]; } + return r; } - diff --git a/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz b/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz index f171a571..88556ba3 100644 --- a/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz +++ b/src/crypto_scalarmult/curve25519/amd64/ref5/scalarmult.jazz @@ -1,19 +1,40 @@ +from Jade require "crypto_scalarmult/curve25519/amd64/common/load_store4.jinc" require "curve25519.jinc" -export fn jade_scalarmult_curve25519_amd64_ref5(reg u64 q n p) -> reg u64 +export fn jade_scalarmult_curve25519_amd64_ref5(reg u64 qp np pp) -> reg u64 { reg u64 r; - __curve25519_ref5(q, n, p); + stack u64 qps; + reg u64[4] q n p; + + qps = qp; + n = __load4(np); + p = __load4(pp); + + q = __curve25519_ref5(n, p); + + qp = qps; + __store4(qp, q); + ?{}, r = #set0(); return r; } -export fn jade_scalarmult_curve25519_amd64_ref5_base(reg u64 q n) -> reg u64 +export fn jade_scalarmult_curve25519_amd64_ref5_base(reg u64 qp np) -> reg u64 { reg u64 r; - __curve25519_ref5_base(q, n); + stack u64 qps; + reg u64[4] q n; + + qps = qp; + n = __load4(np); + + q = __curve25519_ref5_base(n); + + qp = qps; + __store4(qp, q); + ?{}, r = #set0(); return r; } -