From 0b6f740ceae03b54282bc2ae940a3f32f3abaa98 Mon Sep 17 00:00:00 2001 From: Patrick Longa Date: Thu, 4 Feb 2021 17:18:34 -0800 Subject: [PATCH] Optimizing xDBLADD, some minor editing --- src/compression/dlog.c | 25 +++++++++---------------- src/ec_isogeny.c | 39 +++++++++++++++++++-------------------- src/fpx.c | 2 +- 3 files changed, 29 insertions(+), 37 deletions(-) diff --git a/src/compression/dlog.c b/src/compression/dlog.c index 36786db..34e45c7 100644 --- a/src/compression/dlog.c +++ b/src/compression/dlog.c @@ -32,8 +32,7 @@ void from_base(int *D, digit_t *r, int Dlen, int base) digit[0] = (digit_t)(-D[i]); if ((base & 1) == 0) { Montgomery_neg(digit, (digit_t*)Alice_order); - } - else { + } else { mp_sub((digit_t*)Bob_order, digit, digit, NWORDS_ORDER); } } else { @@ -70,8 +69,7 @@ void from_base(int *D, digit_t *r, int Dlen, int base) digit[0] = (digit_t)(-D[0]); if ((base & 1) == 0) { Montgomery_neg(digit, (digit_t*)Alice_order); - } - else { + } else { mp_sub((digit_t*)Bob_order, digit, digit, NWORDS_ORDER); } } else { @@ -88,7 +86,6 @@ void from_base(int *D, digit_t *r, int Dlen, int base) #ifdef COMPRESSED_TABLES - #ifdef ELL2_TORUS int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp) @@ -97,11 +94,9 @@ int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp) // Output: corresponding digit d in [-2^{w1-1},2^{w1-1}] felm_t x, y; felm_t sum = {0}, prods[1<<(W_2_1-1)] = {0}; - f2elm_t tmp; fpcopy(r[0], x); fpcopy(r[1], y); - fpcorrection(x); fpcorrection(y); @@ -112,11 +107,10 @@ int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp) fpneg(sum); fpcorrection(sum); if (memcmp(x, sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[2]; - for (int j = 2; j < W_2; ++j) - { - for (int i = 0; i < (1<<(j-1)); ++i) - { - if ((i % 2) == 0) fpmul_mont(y, Texp[(1<<(j-2)) + (i/2) - 1], prods[(1<<(j-2)) + (i/2) - 1]); + for (int j = 2; j < W_2; ++j) { + for (int i = 0; i < (1<<(j-1)); ++i) { + if ((i % 2) == 0) + fpmul_mont(y, Texp[(1<<(j-2)) + (i/2) - 1], prods[(1<<(j-2)) + (i/2) - 1]); fpcopy(y, sum); for (int k = 0; k <= j-2; ++k) { if (((i>>(j-k-2)) % 2) == 0) @@ -142,9 +136,9 @@ int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp) // Output: The signed digit D in {-ell^(w-1), ..., ell^(w-1)} int ord2w_dloghyb(const felm_t *h, const int *logT, const felm_t *Texp, const felm_t *G) { - int k = 0, d = 0, index = 0, ord = 0, tmp = 0, w = W_2, w2 = w - W_2_1, i_j = 0, t, pow0, pow1; + int k = 0, d = 0, index = 0, ord = 0, tmp = 0, w = W_2, w2 = w - W_2_1, i_j = 0, t; uint8_t inv = 0, flag = 0; - f2elm_t H[W_2_1] = {0}, tmp2; // Size of H should be max of {W_2_1, W_2 - W_2_1} + f2elm_t H[W_2_1] = {0}; // Size of H should be max of {W_2_1, W_2 - W_2_1} felm_t one = {0}; fpcopy((digit_t*)&Montgomery_one, one); @@ -152,8 +146,7 @@ int ord2w_dloghyb(const felm_t *h, const int *logT, const felm_t *Texp, const fe fpcorrection(H[0][0]); fpcorrection(H[0][1]); - for (int i = 1; i <= w2; ++i) - { + for (int i = 1; i <= w2; ++i) { if (!is_felm_zero(H[0][1])) { // check if first compressed Fp2 element in H is NOT the identity for (int j = k; j >= 0; j--) fp2copy(H[j], H[j+1]); sqr_Fp2_cycl_proj(H[0]); diff --git a/src/ec_isogeny.c b/src/ec_isogeny.c index 42ecd66..edd449e 100644 --- a/src/ec_isogeny.c +++ b/src/ec_isogeny.c @@ -273,26 +273,25 @@ void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZP // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. f2elm_t t0, t1, t2; - fp2add(P->X, P->Z, t0); // t0 = XP+ZP - fp2sub(P->X, P->Z, t1); // t1 = XP-ZP - fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 - fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ - fp2correction(t2); - fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ - fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) - fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 - fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) - fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 - fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 - fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] - fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) - fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 - fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) - fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] - fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 - fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 - fp2mul_mont(Q->X, ZPQ, Q->X); // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 - fp2mul_mont(Q->Z, XPQ, Q->Z); // ZQ = XPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + mp2_add(P->X, P->Z, t0); // t0 = XP+ZP + mp2_sub_p2(P->X, P->Z, t1); // t1 = XP-ZP + fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 + mp2_sub_p2(Q->X, Q->Z, t2); // t2 = XQ-ZQ + mp2_add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 + fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + mp2_sub_p2(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + fp2mul_mont(A24, t2, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + mp2_sub_p2(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + mp2_add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + mp2_add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(Q->Z, XPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(Q->X, ZPQ, Q->X); // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 } diff --git a/src/fpx.c b/src/fpx.c index 72629f8..09cbc34 100644 --- a/src/fpx.c +++ b/src/fpx.c @@ -1558,7 +1558,7 @@ int reverse_bits(int t, unsigned int nbits) x >>= 1; bits++; } - while (bits < nbits) { + while ((unsigned int)bits < nbits) { r <<= 1; bits++; }