diff --git a/src/compression/dlog.c b/src/compression/dlog.c index 4e113d5..36786db 100644 --- a/src/compression/dlog.c +++ b/src/compression/dlog.c @@ -8,7 +8,7 @@ void from_base(int *D, digit_t *r, int Dlen, int base) { // Convert a number in base "base" with signed digits: (D[k-1]D[k-2]...D[1]D[0])_base < 2^(NWORDS_ORDER*RADIX) into decimal - // Output: r = D[k-1]*base^(k-1) + ... + D[1]*base + D[0] + // Output: r = (D[k-1]*base^(k-1) + ... + D[1]*base + D[0])_10 digit_t ell[NWORDS_ORDER] = {0}, digit[NWORDS_ORDER] = {0}, temp[NWORDS_ORDER] = {0}; int ellw; @@ -107,11 +107,11 @@ int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp) if (is_felm_zero(y)) return 0; if (is_felm_zero(x)) return logT[0]; - if (ct_compare((unsigned char *)x, (unsigned char *)y, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[1]; + if (memcmp(x, y, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[1]; fpcopy(y, sum); fpneg(sum); fpcorrection(sum); - if (ct_compare((unsigned char *)x, (unsigned char *)sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[2]; + if (memcmp(x, sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[2]; for (int j = 2; j < W_2; ++j) { for (int i = 0; i < (1<<(j-1)); ++i) @@ -125,12 +125,12 @@ int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp) fpsub(sum, prods[(1<> (j-k-1)) - 1], sum); } fpcorrection(sum); - if (ct_compare((unsigned char *)x, (unsigned char *)sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(x, sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { return logT[(1<> 1)], H[j][1], one); } fpcorrection(one); - if (ct_compare((unsigned char *)H[j][0], (unsigned char *)one, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { + if (memcmp(H[j][0], one, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { d += 1 << (w-1); i_j++; flag = 1; @@ -296,18 +296,18 @@ void Traverse_w_div_e_fullsigned(const f2elm_t r, int j, int k, int z, const uns fp2copy(r, rp); fp2correction(rp); - if (is_felm_zero(rp[1]) && ct_compare((unsigned char *)rp[0],(unsigned char *)&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (is_felm_zero(rp[1]) && memcmp(rp[0],&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = 0; } else { for (int t = 1; t <= ellw/2; t++) { - if (ct_compare((unsigned char*)rp, (unsigned char*)&CT[2*((Dlen - 1)*(ellw/2) + (t-1))], 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(rp, CT[2*((Dlen - 1)*(ellw/2) + (t-1))], 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = -t; break; } else { fp2copy(CT + 2*((Dlen - 1)*(ellw/2) + (t-1)), alpha); fpneg(alpha[1]); fpcorrection(alpha[1]); - if (ct_compare((unsigned char*)rp, (unsigned char*)alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(rp, alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = t; break; } @@ -367,19 +367,19 @@ void Traverse_w_notdiv_e_fullsigned(const f2elm_t r, int j, int k, int z, const fp2copy(r, rp); fp2correction(rp); - if (is_felm_zero(rp[1]) && ct_compare((unsigned char *)rp[0],(unsigned char *)&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (is_felm_zero(rp[1]) && memcmp(rp[0],&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = 0; } else { if (!(j == 0 && k == Dlen - 1)) { for (int t = 1; t <= (ellw/2); t++) { - if (ct_compare((unsigned char*)&CT2[2*(ellw/2)*(Dlen-1) + 2*(t-1)], (unsigned char*)rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(CT2[2*(ellw/2)*(Dlen-1) + 2*(t-1)], rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = -t; break; } else { fp2copy(CT2 + 2*((ellw/2)*(Dlen-1) + (t-1)), alpha); fpneg(alpha[1]); fpcorrection(alpha[1]); - if (ct_compare((unsigned char*)rp, (unsigned char*)alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(rp, alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = t; break; } @@ -387,14 +387,14 @@ void Traverse_w_notdiv_e_fullsigned(const f2elm_t r, int j, int k, int z, const } } else { for (int t = 1; t <= ell_emodw/2; t++) { - if (ct_compare((unsigned char*)&CT1[2*(ellw/2)*(Dlen - 1) + 2*(t-1)], (unsigned char*)rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(CT1[2*(ellw/2)*(Dlen - 1) + 2*(t-1)], rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = -t; break; } else { fp2copy(CT1 + 2*((ellw/2)*(Dlen-1) + (t-1)), alpha); fpneg(alpha[1]); fpcorrection(alpha[1]); - if (ct_compare((unsigned char*)rp, (unsigned char*)alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(rp, alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { D[k] = t; break; } @@ -430,19 +430,3 @@ void solve_dlog(const f2elm_t r, int *D, digit_t* d, int ell) } -void ph2(const point_full_proj_t phiP, const point_full_proj_t phiQ, const point_t PS, const point_t QS, const f2elm_t A, digit_t* c0, digit_t* d0, digit_t* c1, digit_t* d1) -{ // Computes the 4 coefficients of the change of basis matrix between the bases {phiP,phiQ} and {PS, QS} - // Assume both bases generate the full 2^eA torsion - f2elm_t n[4] = {0}; - int D[DLEN_2]; - - // Compute the four pairings - Tate_4_pairings_2_torsion(phiP, phiQ, PS, QS, A, n); - - solve_dlog(n[0], D, d0, 2); - solve_dlog(n[2], D, c0, 2); - mp_sub((digit_t*)Alice_order, c0, c0, NWORDS_ORDER); - solve_dlog(n[1], D, d1, 2); - solve_dlog(n[3], D, c1, 2); - mp_sub((digit_t*)Alice_order, c1, c1, NWORDS_ORDER); -} \ No newline at end of file diff --git a/src/compression/pairing.c b/src/compression/pairing.c index 09132a5..23e1374 100644 --- a/src/compression/pairing.c +++ b/src/compression/pairing.c @@ -7,51 +7,6 @@ #define t_points 2 -void get_A_compression(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A) -{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. - // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. - // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. - f2elm_t t0, t1, one = {0}; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - fp2add(xP, xQ, t1); // t1 = xP+xQ - fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ - fp2mul_mont(xR, t1, A); // A = xR*t1 - fp2add(t0, A, A); // A = A+t0 - fp2mul_mont(t0, xR, t0); // t0 = t0*xR - fp2sub(A, one, A); // A = A-1 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2add(t1, xR, t1); // t1 = t1+xR - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2sqr_mont(A, A); // A = A^2 - fp2inv_mont_bingcd(t0); // t0 = 1/t0 - fp2mul_mont(A, t0, A); // A = A*t0 - fp2sub(A, t1, A); // Afinal = A-t1 -} - - -void recover_y(const publickey_t PK, point_full_proj_t phiP, point_full_proj_t phiQ, point_full_proj_t phiX, f2elm_t A) -{ // Recover the y-coordinates of the public key - // The three resulting points are (simultaneously) correct up to sign - f2elm_t tmp, phiXY, one = {0}; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - get_A_compression(PK[0], PK[1], PK[2], A); // NOTE: don't have to compress this, can output in keygen - - fp2add(PK[2], A, tmp); - fp2mul_mont(PK[2], tmp, tmp); - fp2add(tmp, one, tmp); - fp2mul_mont(PK[2], tmp, tmp); // tmp = PK[2]^3+A*PK[2]^2+PK[2]; - sqrt_Fp2(tmp, phiXY); - fp2copy(PK[2], phiX->X); - fp2copy(phiXY, phiX->Y); - fp2copy(one, phiX->Z); // phiX = [PK[2],phiXY,1]; - - recover_os(PK[1], one, PK[0], one, PK[2], phiXY, A, phiQ->X, phiQ->Y, phiQ->Z); - fp2neg(phiXY); - recover_os(PK[0], one, PK[1], one, PK[2], phiXY, A, phiP->X, phiP->Y, phiP->Z); -} - static void final_exponentiation_2_torsion(f2elm_t f, const f2elm_t finv, f2elm_t fout) { // The final exponentiation for pairings in the 2^eA-torsion group. Raising the value f to the power (p^2-1)/2^eA. @@ -72,171 +27,6 @@ static void final_exponentiation_2_torsion(f2elm_t f, const f2elm_t finv, f2elm_ } -void Tate_pairings_2_torsion(const point_full_proj_t P, point_full_proj_t *Qj, f2elm_t a, f2elm_t* n) -{ // Compute the reduced Tate pairings e_{2^m}(P, Q_j) for the curve y^2 = x^3 + a*x + b: - f2elm_t h[t_points], one = {0}; - f2elm_t X, Y, Z, X2, Y2, Y4, M, S, T, temp; - f2elm_t Xp, Yp, Zp, Tp; - f2elm_t L, W, g; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - fp2copy(P->X, X); - fp2copy(P->Y, Y); - fp2copy(P->Z, Z); - fp2sqr_mont(Z, T); - - for (int j = 0; j < t_points; j++) { - fp2copy(one, n[j]); - fp2mul_mont(T, Qj[j]->X, temp); - fp2sub(temp, X, h[j]); - } - - for (int k = 0; k < OALICE_BITS; k++) { - // Point doubling and line function construction: - fp2sqr_mont(X, X2); - fp2sqr_mont(Y, Y2); - fp2sqr_mont(Y2, Y4); - fp2sqr_mont(T, temp); - fp2mul_mont(a, temp, temp); - fp2add(X2, X2, M); - fp2add(M, X2, M); - fp2add(M, temp, M); // M = 3*X_2 + a*T^2 - fp2add(X, Y2, S); - fp2sqr_mont(S, S); - fp2sub(S, X2, S); - fp2sub(S, Y4, S); - fp2add(S, S, S); // S = 2*((X + Y2)^2 - X2 - Y4) - fp2sqr_mont(M, temp); - fp2add(S, S, Xp); - fp2sub(temp, Xp, Xp); // Xp = M^2 - 2*S - fp2sub(S, Xp, temp); - fp2mul_mont(M, temp, temp); - fp2shl(Y4, 3, Yp); - fp2sub(temp, Yp, Yp); // Yp = M*(S - Xp) - 8*Y4 - fp2add(Y, Z, temp); - fp2sqr_mont(temp, temp); - fp2sub(temp, Y2, temp); - fp2sub(temp, T, Zp); // Zp = (Y + Z)^2 - Y2 - T - fp2sqr_mont(Zp, Tp); // Tp = Zp^2 - fp2mul_mont(Zp, T, L); // L = Zp*T - fp2add(Y2, Y2, W); // W = 2*Y2 - - fp2correction(Zp); - if (is_felm_zero(Zp[0]) && is_felm_zero(Zp[1])) { // Doubling exception for points in 2*E - fp2copy(one, Xp); - fp2copy(one, Yp); - } - - // Line function evaluation and accumulation: - for (int j = 0; j < t_points; j++) { - if (!is_felm_zero(Zp[0]) || !is_felm_zero(Zp[1])) { - fp2mul_mont(M, h[j], temp); - fp2add(temp, W, temp); - fp2mul_mont(L, Qj[j]->Y, g); - fp2sub(temp, g, g); // g = M*hj + W - L*Y_{Qj} - fp2mul_mont(Tp, Qj[j]->X, temp); - fp2sub(temp, Xp, h[j]); // hj = Tp*X_{Qj} - Xp - fp2_conj(h[j], temp); - fp2mul_mont(temp, g, g); // g = g*hj^* - } else { - fp2copy(h[j], g); - } - fp2sqr_mont(n[j], n[j]); - fp2mul_mont(n[j], g, n[j]); - } - fp2copy(Xp, X); - fp2copy(Yp, Y); - fp2copy(Zp, Z); - fp2copy(Tp, T); - } - - // Final exponentiation: - mont_n_way_inv(n, t_points, h); - for (int j = 0; j < t_points; j++) { - fp2correction(Qj[j]->Z); - if (is_felm_zero(Qj[j]->Z[0]) && is_felm_zero(Qj[j]->Z[1])) - fp2copy(one, n[j]); - else { - final_exponentiation_2_torsion(n[j], h[j], n[j]); - } - } -} - - -void Monty2Weier(const f2elm_t A, f2elm_t a, f2elm_t b) -{ // Convert a Montgomery curve EM: y^2 = x^3 + A*x^2 + x into its short Weierstrass form EW: v^2 = u^3 + a*u + b. - f2elm_t one = {0}, temp, A2, AA; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - // a = 1 - A^2/3 - fp2sqr_mont(A, A2); - fpmul_mont(A2[0], (digit_t*)threeinv, temp[0]); - fpmul_mont(A2[1], (digit_t*)threeinv, temp[1]); - fp2sub(one, temp, a); - - // b = (2*A^3 - 9*A)/27 - fp2add(A, A, AA); // AA = 2*A - fp2mul_mont(AA, A2, temp); // temp = 2A^3 - fp2add(AA, AA, A2); // A2 = 4*A - fp2add(A2, A2, A2); // A2 = 8*A - fp2add(A2, A, A2); // A2 = 9*A - fp2sub(temp, A2, b); // b = 2A^3 - 9*A - fpmul_mont((digit_t*)threeinv, (digit_t*)threeinv, temp[0]); - fpmul_mont(temp[0], (digit_t*)threeinv, temp[0]); - fpmul_mont(temp[0], b[0], b[0]); - fpmul_mont(temp[0], b[1], b[1]); -} - - -void PointMonty2Weier(const point_full_proj_t PM, point_full_proj_t PW, const f2elm_t A) -{ // Convert a point PM on a Montgomery curve y^2 = x^3 + A*x^2 + x into the corresponding point on its short Weierstrass form EW. - f2elm_t zero = {0}, one = {0}, temp; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - - if (is_felm_zero(PM->Z[0]) && is_felm_zero(PM->Z[1])) { - fp2copy(zero,PW->X); - fp2copy(one, PW->Y); - fp2copy(zero,PW->Z); - return; - } - fpmul_mont((digit_t*)threeinv, A[0], temp[0]); - fpmul_mont((digit_t*)threeinv, A[1], temp[1]); - - // PW = EW(PM->X + A/3, PM->Y, 1) - fp2add(temp, PM->X, PW->X); - fp2copy(PM->Y, PW->Y); - fp2copy(one, PW->Z); -} - - -void Tate_4_pairings_2_torsion(const point_full_proj_t P, const point_full_proj_t Q, const point_t S1, const point_t S2, const f2elm_t A, f2elm_t* n) -{ // The doubling only 2-torsion Tate pairing of order 2^eA, consisting of the doubling only Miller loop and the final exponentiation.] - // Computes 4 pairings at once: e(P, S1), e(P, S2), e(Q, S1), e(Q, S2). - point_full_proj_t Qj[2], PW, QW, QjW[2]; - f2elm_t a, b, one = {0}; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - Monty2Weier(A, a, b); - - // Assume S1 and S2 are normalized - fp2copy(S1->x, Qj[0]->X); - fp2copy(S1->y, Qj[0]->Y); - fp2copy(one, Qj[0]->Z); - fp2copy(S2->x, Qj[1]->X); - fp2copy(S2->y, Qj[1]->Y); - fp2copy(one, Qj[1]->Z); - - PointMonty2Weier(P, PW, A); - PointMonty2Weier(Q, QW, A); - PointMonty2Weier(Qj[0], QjW[0], A); - PointMonty2Weier(Qj[1], QjW[1], A); - - Tate_pairings_2_torsion(PW, QjW, a, n); - Tate_pairings_2_torsion(QW, QjW, a, n + 2); -} - - static void final_exponentiation_3_torsion(f2elm_t f, const f2elm_t finv, f2elm_t fout) { // The final exponentiation for pairings in the 3-torsion group. Raising the value f to the power (p^2-1)/3^eB. felm_t one = {0}; @@ -256,129 +46,6 @@ static void final_exponentiation_3_torsion(f2elm_t f, const f2elm_t finv, f2elm_ } -void Tate_pairings_3_torsion(const point_full_proj_t P, point_full_proj_t *Qj, f2elm_t a, f2elm_t* n) -{ // The tripling only 3-torsion Tate pairing of order 3^eB, consisting of the tripling only Miller loop and the final exponentiation. - // Computes 4 pairings at once: e(P, R1), e(P, R2), e(Q, R1), e(Q, R2). - f2elm_t h[t_points], one = {0}; - f2elm_t X, Y, Z, X2, Y2, Y4, M, S, T; - f2elm_t Xp, Yp, Zp, Tp, D, U, Up, Fp; - f2elm_t L, W, Wp, g, T2, M2, F, F2, d; - f2elm_t temp, temp1; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - fp2copy(P->X, X); - fp2copy(P->Y, Y); - fp2copy(P->Z, Z); - fp2sqr_mont(Z, T); - - for (int j = 0; j < t_points; j++) { - fp2copy(one, n[j]); - fp2mul_mont(T, Qj[j]->X, temp); - fp2sub(temp, X, h[j]); - } - - for (int k = 0; k < OBOB_EXPON; k++) { - // Point tripling and parabola function construction: - fp2sqr_mont(X, X2); - fp2sqr_mont(Y, Y2); - fp2sqr_mont(Y2, Y4); - fp2sqr_mont(T, T2); - fp2add(X2, X2, temp); - fp2add(temp, X2, temp); - fp2mul_mont(a, T2, M); - fp2add(temp, M, M); // M = 3*X2 + a*T2 - fp2sqr_mont(M, M2); - fp2add(X, Y2, temp); - fp2sqr_mont(temp, temp); - fp2sub(temp, X2, temp); - fp2sub(temp, Y4, D); // D = (X + Y2)^2 - X2 - Y4 - fp2add(D, D, temp); - fp2add(temp, D, temp); - fp2add(temp, temp, temp); - fp2sub(temp, M2, F); // F = 6*D - M2 - fp2sqr_mont(F, F2); - fp2add(Y2, Y2, W); - fp2add(W, W, Wp); - fp2shl(Y4, 4, S); - fp2add(M, F, temp); - fp2sqr_mont(temp, temp); - fp2sub(temp, M2, temp); - fp2sub(temp, F2, temp); - fp2sub(temp, S, U); // U = (M + F)^2 - M2 - F2 - S - fp2sub(S, U, Up); - fp2mul_mont(X, F2, temp); - fp2mul_mont(Wp, U, Xp); - fp2sub(temp, Xp, Xp); - fp2shl(Xp, 2, Xp); // Xp = 4*(X*F2 - Wp*U) - fp2mul_mont(U, Up, temp); - fp2mul_mont(F, F2, Yp); - fp2sub(temp, Yp, Yp); - fp2mul_mont(Y, Yp, Yp); - fp2shl(Yp, 3, Yp); // Yp = 8*Y*(U*Up - F*F2) - fp2add(Z, F, temp); - fp2sqr_mont(temp, temp); - fp2sub(temp, T, temp); - fp2sub(temp, F2, Zp); // Zp = (Z + F)^2 - T - F2 - fp2sqr_mont(Zp, Tp); - fp2add(Y, Z, temp); - fp2sqr_mont(temp, temp); - fp2sub(temp, Y2, temp); - fp2sub(temp, T, temp); - fp2mul_mont(temp, T, L); - fp2add(F, F, Fp); - - fp2correction(Zp); - if (is_felm_zero(Zp[0]) && is_felm_zero(Zp[1])) { - fp2copy(one, Xp); - fp2copy(one, Yp); - } - - // Parabola function evaluation and accumulation - for (int j = 0; j < t_points; j++) { - fp2mul_mont(L, Qj[j]->Y, temp); - fp2sub(W, temp, d); - if (!is_felm_zero(Zp[0]) || !is_felm_zero(Zp[1])) { - fp2mul_mont(M, h[j], temp); - fp2add(d, temp, g); // g = (M*h + d) - fp2mul_mont(Up, h[j], temp); - fp2mul_mont(Fp, d, temp1); - fp2add(temp, temp1, temp1); - fp2mul_mont(temp1, g, g); // g = (M*h + d)*(Up*h + Fp*d) - fp2mul_mont(Wp, h[j], temp); - fp2add(F, temp, temp); - fp2_conj(temp, temp); - fp2mul_mont(temp, g, g); // g = (M*h + d)*(Up*h + Fp*d)*(Wp*h + F)^* - fp2mul_mont(Tp, Qj[j]->X, temp); - fp2sub(temp, Xp, h[j]); // h = Tp*X_{Qj} - Xp - fp2_conj(h[j], temp); - fp2mul_mont(temp, g, g); - } else { - fp2mul_mont(M, h[j], temp); - fp2add(temp, d, g); - } - fp2sqr_mont(n[j], temp); - fp2mul_mont(temp, n[j], n[j]); - fp2mul_mont(g, n[j], n[j]); - } - fp2copy(Xp, X); - fp2copy(Yp, Y); - fp2copy(Zp, Z); - fp2copy(Tp, T); - } - - // Final exponentiation: - mont_n_way_inv(n, t_points, h); - for (int j = 0; j < t_points; j++) { - fp2correction(Qj[j]->Z); - if (is_felm_zero(Qj[j]->Z[0]) && is_felm_zero(Qj[j]->Z[1])) - fp2copy(one, n[j]); - else { - final_exponentiation_3_torsion(n[j], h[j], n[j]); - } - } -} - - void Tate3_pairings(point_full_proj_t *Qj, f2elm_t* f) { felm_t *x, *y, *l1, *l2, *n1, *n2, *x2, *x23, *x2p3; diff --git a/src/compression/sidh_compressed.c b/src/compression/sidh_compressed.c index 17a4b6c..e3357d6 100644 --- a/src/compression/sidh_compressed.c +++ b/src/compression/sidh_compressed.c @@ -7,8 +7,7 @@ #include "../random/random.h" #include -#define COMPRESSION 0 -#define DECOMPRESSION 1 + static void init_basis(digit_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) @@ -51,472 +50,6 @@ void random_mod_order_B(unsigned char* random_digits) } -static void Ladder3pt_dual(const point_proj_t *Rs, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t A24) -{ // Project 3-point ladder - point_proj_t R0 = {0}, R2 = {0}; - digit_t mask; - int i, nbits, bit, swap, prevbit = 0; - - if (AliceOrBob == ALICE) { - nbits = OALICE_BITS; - } else { - nbits = OBOB_BITS; - } - - fp2copy(Rs[1]->X, R0->X); - fp2copy(Rs[1]->Z, R0->Z); - fp2copy(Rs[2]->X, R2->X); - fp2copy(Rs[2]->Z, R2->Z); - fp2copy(Rs[0]->X, R->X); - fp2copy(Rs[0]->Z, R->Z); - - // Main loop - for (i = 0; i < nbits; i++) { - bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1; - swap = bit ^ prevbit; - prevbit = bit; - mask = 0 - (digit_t)swap; - - swap_points(R, R2, mask); - xDBLADD(R0, R2, R->X, A24); - fp2mul_mont(R2->X, R->Z, R2->X); - } - swap = 0 ^ prevbit; - mask = 0 - (digit_t)swap; - swap_points(R, R2, mask); -} - - -static void Elligator2(const f2elm_t a24, const unsigned int r, f2elm_t x, unsigned char *bit, const unsigned char COMPorDEC) -{ // Generate an x-coordinate of a point on curve with (affine) coefficient a24 - // Use the counter r - int i; - felm_t one_fp, a2, b2, N, temp0, temp1; - f2elm_t A, y2, *t_ptr; - - fpcopy((digit_t*)&Montgomery_one, one_fp); - fp2add(a24, a24, A); - fpsub(A[0], one_fp, A[0]); - fp2add(A, A, A); // A = 4*a24-2 - - // Elligator computation - t_ptr = (f2elm_t *)&v_3_torsion[r]; - fp2mul_mont(A, (felm_t*)t_ptr, x); // x = A*v; v := 1/(1 + U*r^2) table lookup - fp2neg(x); // x = -A*v; - - if (COMPorDEC == COMPRESSION) { - fp2add(A, x, y2); // y2 = x + A - fp2mul_mont(y2, x, y2); // y2 = x*(x + A) - fpadd(y2[0], one_fp, y2[0]); // y2 = x(x + A) + 1 - fp2mul_mont(x, y2, y2); // y2 = x*(x^2 + Ax + 1); - fpsqr_mont(y2[0], a2); - fpsqr_mont(y2[1], b2); - fpadd(a2, b2, N); // N := norm(y2); - - fpcopy(N, temp0); - for (i = 0; i < OALICE_BITS - 2; i++) { - fpsqr_mont(temp0, temp0); - } - for (i = 0; i < OBOB_EXPON; i++) { - fpsqr_mont(temp0, temp1); - fpmul_mont(temp0, temp1, temp0); - } - fpsqr_mont(temp0, temp1); // z = N^((p + 1) div 4); - fpcorrection(temp1); - fpcorrection(N); - if (ct_compare((unsigned char*)temp1, (unsigned char*)N, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { - fp2neg(x); - fp2sub(x, A, x); // x = -x - A; - if (COMPorDEC == COMPRESSION) - *bit = 1; - } - } else { - if (*bit) { - fp2neg(x); - fp2sub(x,A,x); // x = -x - A; - } - } -} - - -static void make_positive(f2elm_t x) -{ - unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); - felm_t zero = {0}; - - from_fp2mont(x, x); - if (ct_compare((unsigned char*)x[0], (unsigned char*)zero, (size_t)nbytes) != 0) { - if ((x[0][0] & 1) == 1) - fp2neg(x); - } else { - if ((x[1][0] & 1) == 1) - fp2neg(x); - } - to_fp2mont(x, x); -} - - -static void BiQuad_affine(const f2elm_t a24, const f2elm_t x0, const f2elm_t x1, point_proj_t R) -{ - f2elm_t Ap2, aa, bb, cc, t0, t1; - - fp2add(a24, a24, Ap2); - fp2add(Ap2, Ap2, Ap2); // Ap2 = a+2 = 4*a24 - - fp2sub(x0, x1, aa); - fp2sqr_mont(aa, aa); - - fp2mul_mont(x0, x1, cc); - fpsub(cc[0], (digit_t*)Montgomery_one, cc[0]); - fp2sqr_mont(cc, cc); - - fpsub(x0[0], (digit_t*)Montgomery_one, bb[0]); - fpcopy(x0[1], bb[1]); - fp2sqr_mont(bb, bb); - fp2mul_mont(Ap2, x0, t0); - fp2add(bb, t0, bb); - fp2mul_mont(x1, bb, bb); - fpsub(x1[0], (digit_t*)Montgomery_one, t0[0]); - fpcopy(x1[1], t0[1]); - fp2sqr_mont(t0, t0); - fp2mul_mont(Ap2, x1, t1); - fp2add(t0, t1, t0); - fp2mul_mont(x0, t0, t0); - fp2add(bb, t0, bb); - fp2add(bb, bb, bb); - - fp2sqr_mont(bb, t0); - fp2mul_mont(aa, cc, t1); - fp2add(t1, t1, t1); - fp2add(t1, t1, t1); - fp2sub(t0, t1, t0); - sqrt_Fp2(t0, t0); - make_positive(t0); // Make the sqrt "positive" - fp2add(bb, t0, R->X); - fp2add(aa, aa, R->Z); -} - - -static void get_4_isog_dual(const point_proj_t P, f2elm_t A24, f2elm_t C24, f2elm_t* coeff) -{ - fp2sub(P->X, P->Z, coeff[1]); - fp2add(P->X, P->Z, coeff[2]); - fp2sqr_mont(P->Z, coeff[4]); - fp2add(coeff[4], coeff[4], coeff[0]); - fp2sqr_mont(coeff[0], C24); - fp2add(coeff[0], coeff[0], coeff[0]); - fp2sqr_mont(P->X, coeff[3]); - fp2add(coeff[3], coeff[3], A24); - fp2sqr_mont(A24, A24); -} - -#if (OALICE_BITS % 2 == 1) - -static void eval_dual_2_isog(const f2elm_t X2, const f2elm_t Z2, point_proj_t P) -{ - f2elm_t t0; - - fp2add(P->X, P->Z, t0); - fp2sub(P->X, P->Z, P->Z); - fp2sqr_mont(t0, t0); - fp2sqr_mont(P->Z, P->Z); - fp2sub(t0, P->Z, P->Z); - fp2mul_mont(X2, P->Z, P->Z); - fp2mul_mont(Z2, t0, P->X); -} - -#endif - -static void eval_final_dual_2_isog(point_proj_t P) -{ - f2elm_t t0, t1; - felm_t t2; - - fp2add(P->X, P->Z, t0); - fp2mul_mont(P->X, P->Z, t1); - fp2sqr_mont(t0, P->X); - fpcopy((P->X)[0], t2); - fpcopy((P->X)[1], (P->X)[0]); - fpcopy(t2, (P->X)[1]); - fpneg((P->X)[1]); - fp2add(t1, t1, P->Z); - fp2add(P->Z, P->Z, P->Z); -} - - -static void eval_dual_4_isog_shared(const f2elm_t X4pZ4, const f2elm_t X42, const f2elm_t Z42, f2elm_t* coeff) -{ - fp2sub(X42, Z42, coeff[0]); - fp2add(X42, Z42, coeff[1]); - fp2sqr_mont(X4pZ4, coeff[2]); - fp2sub(coeff[2], coeff[1], coeff[2]); -} - - -static void eval_dual_4_isog(const f2elm_t A24, const f2elm_t C24, const f2elm_t* coeff, point_proj_t P) -{ - f2elm_t t0, t1, t2, t3; - - fp2add(P->X, P->Z, t0); - fp2sub(P->X, P->Z, t1); - fp2sqr_mont(t0, t0); - fp2sqr_mont(t1, t1); - fp2sub(t0, t1, t2); - fp2sub(C24, A24, t3); - fp2mul_mont(t2, t3, t3); - fp2mul_mont(C24, t0, t2); - fp2sub(t2, t3, t2); - fp2mul_mont(t2, t0, P->X); - fp2mul_mont(t3, t1, P->Z); - fp2mul_mont(coeff[0], P->X, P->X); - fp2mul_mont(coeff[1], P->Z, t0); - fp2add(P->X, t0, P->X); - fp2mul_mont(coeff[2], P->Z, P->Z); -} - - -static void eval_full_dual_4_isog(const f2elm_t As[][5], point_proj_t P) -{ - // First all 4-isogenies - for(unsigned int i = 0; i < MAX_Alice; i++) { - eval_dual_4_isog(As[MAX_Alice-i][0], As[MAX_Alice-i][1], *(As+MAX_Alice-i-1)+2, P); - } -#if (OALICE_BITS % 2 == 1) - eval_dual_2_isog(As[MAX_Alice][2], As[MAX_Alice][3], P); -#endif - eval_final_dual_2_isog(P); // to A = 0 -} - - -static void TripleAndParabola_proj(const point_full_proj_t R, f2elm_t l1x, f2elm_t l1z) -{ - fp2sqr_mont(R->X, l1z); - fp2add(l1z, l1z, l1x); - fp2add(l1x, l1z, l1x); - fpadd(l1x[0], (digit_t*)&Montgomery_one, l1x[0]); - fp2add(R->Y, R->Y, l1z); -} - - -static void Tate3_proj(const point_full_proj_t P, const point_full_proj_t Q, f2elm_t gX, f2elm_t gZ) -{ - f2elm_t t0, l1x; - - TripleAndParabola_proj(P, l1x, gZ); - fp2sub(Q->X, P->X, gX); - fp2mul_mont(l1x, gX, gX); - fp2sub(P->Y, Q->Y, t0); - fp2mul_mont(gZ, t0, t0); - fp2add(gX, t0, gX); -} - - -static void FinalExpo3(f2elm_t gX, f2elm_t gZ) -{ - unsigned int i; - f2elm_t f_; - - fp2copy(gZ, f_); - fpneg(f_[1]); - fp2mul_mont(gX, f_, f_); - fp2inv_mont_bingcd(f_); - fpneg(gX[1]); - fp2mul_mont(gX,gZ, gX); - fp2mul_mont(gX,f_, gX); - for(i = 0; i < OALICE_BITS; i++) - fp2sqr_mont(gX, gX); - for(i = 0; i < OBOB_EXPON-1; i++) - cube_Fp2_cycl(gX, (digit_t*)Montgomery_one); -} - - -static void FinalExpo3_2way(f2elm_t *gX, f2elm_t *gZ) -{ - unsigned int i, j; - f2elm_t f_[2], finv[2]; - - for(i = 0; i < 2; i++) { - fp2copy(gZ[i], f_[i]); - fpneg(f_[i][1]); // Conjugate - fp2mul_mont(gX[i], f_[i], f_[i]); - } - mont_n_way_inv(f_,2,finv); - for(i = 0; i < 2; i++) { - fpneg(gX[i][1]); - fp2mul_mont(gX[i], gZ[i], gX[i]); - fp2mul_mont(gX[i], finv[i], gX[i]); - for(j = 0; j < OALICE_BITS; j++) - fp2sqr_mont(gX[i], gX[i]); - for(j = 0; j < OBOB_EXPON-1; j++) - cube_Fp2_cycl(gX[i], (digit_t*)&Montgomery_one); - } -} - - -static bool FirstPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char *ind) -{ - point_full_proj_t R3,S3; - f2elm_t gX[2],gZ[2]; - felm_t zero = {0}; - unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); - unsigned char alpha,beta; - - fpcopy((digit_t*)B_gen_3_tors + 0*NWORDS_FIELD, (R3->X)[0]); - fpcopy((digit_t*)B_gen_3_tors + 1*NWORDS_FIELD, (R3->X)[1]); - fpcopy((digit_t*)B_gen_3_tors + 2*NWORDS_FIELD, (R3->Y)[0]); - fpcopy((digit_t*)B_gen_3_tors + 3*NWORDS_FIELD, (R3->Y)[1]); - fpcopy((digit_t*)B_gen_3_tors + 4*NWORDS_FIELD, (S3->X)[0]); - fpcopy((digit_t*)B_gen_3_tors + 5*NWORDS_FIELD, (S3->X)[1]); - fpcopy((digit_t*)B_gen_3_tors + 6*NWORDS_FIELD, (S3->Y)[0]); - fpcopy((digit_t*)B_gen_3_tors + 7*NWORDS_FIELD, (S3->Y)[1]); - - CompletePoint(P,R); - Tate3_proj(R3,R,gX[0],gZ[0]); - Tate3_proj(S3,R,gX[1],gZ[1]); - FinalExpo3_2way(gX,gZ); - - // Do small DLog with respect to g_R3_S3 - fp2correction(gX[0]); - fp2correction(gX[1]); - if (ct_compare((unsigned char*)gX[0][1], (unsigned char*)zero, (size_t)nbytes) == 0) // = 1 - alpha = 0; - else if (ct_compare((unsigned char*)gX[0][1], (unsigned char*)g_R_S_im, (size_t)nbytes) == 0) // = g_R3_S3 - alpha = 1; - else // = g_R3_S3^2 - alpha = 2; - - if (ct_compare((unsigned char*)gX[1][1], (unsigned char*)zero, (size_t)nbytes) == 0) // = 1 - beta = 0; - else if (ct_compare((unsigned char*)gX[1][1], (unsigned char*)g_R_S_im, (size_t)nbytes) == 0) // = g_R3_S3 - beta = 1; - else // = g_R3_S3^2 - beta = 2; - - if (alpha == 0 && beta == 0) // Not full order - return false; - - // Return the 3-torsion point that R lies above - if (alpha == 0) // Lies above R3 - *ind = 0; - else if (beta == 0) // Lies above S3 - *ind = 1; - else if (alpha + beta == 3) // Lies above R3+S3 - *ind = 3; - else // Lies above R3-S3 - *ind = 2; - - return true; -} - - -static bool SecondPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char ind) -{ - point_full_proj_t RS3; - f2elm_t gX, gZ; - felm_t zero = {0}; - unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); - - // Pair with 3-torsion point determined by first point - fpcopy((digit_t*)B_gen_3_tors + (4*ind + 0)*NWORDS_FIELD, (RS3->X)[0]); - fpcopy((digit_t*)B_gen_3_tors + (4*ind + 1)*NWORDS_FIELD, (RS3->X)[1]); - fpcopy((digit_t*)B_gen_3_tors + (4*ind + 2)*NWORDS_FIELD, (RS3->Y)[0]); - fpcopy((digit_t*)B_gen_3_tors + (4*ind + 3)*NWORDS_FIELD, (RS3->Y)[1]); - - CompletePoint(P, R); - Tate3_proj(RS3, R, gX, gZ); - FinalExpo3(gX, gZ); - - fp2correction(gX); - if (ct_compare((unsigned char*)gX[1], (unsigned char*)zero, (size_t)nbytes) != 0) // Not equal to 1 - return true; - else - return false; -} - - -static void FirstPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char *ind, unsigned char *bitEll) -{ - bool b = false; - point_proj_t P; - felm_t zero = {0}; - *r = 0; - - while (!b) { - *bitEll = 0; - Elligator2(a24, *r, x, bitEll, COMPRESSION); // Get x-coordinate on curve a24 - - fp2copy(x, P->X); - fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]); - fpcopy(zero, (P->Z)[1]); - eval_full_dual_4_isog(As, P); // Move x over to A = 0 - - b = FirstPoint_dual(P, R, ind); // Compute DLog with 3-torsion points - *r = *r + 1; - } -} - - -static void SecondPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char ind, unsigned char *bitEll) -{ - bool b = false; - point_proj_t P; - felm_t zero = {0}; - - while (!b) { - *bitEll = 0; - Elligator2(a24, *r, x, bitEll, COMPRESSION); - - fp2copy(x, P->X); - fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]); - fpcopy(zero, (P->Z)[1]); - eval_full_dual_4_isog(As, P); // Move x over to A = 0 - - b = SecondPoint_dual(P, R, ind); - *r = *r + 1; - } -} - - -static void makeDiff(const point_full_proj_t R, point_full_proj_t S, const point_proj_t D) -{ - f2elm_t t0, t1, t2; - unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); - - fp2sub(R->X, S->X, t0); - fp2sub(R->Y, S->Y, t1); - fp2sqr_mont(t0, t0); - fp2sqr_mont(t1, t1); - fp2add(R->X, S->X, t2); - fp2mul_mont(t0, t2, t2); - fp2sub(t1, t2, t1); - fp2mul_mont(D->Z, t1, t1); - fp2mul_mont(D->X, t0, t0); - fp2correction(t0); - fp2correction(t1); - if (ct_compare((unsigned char*)t0[0], (unsigned char*)t1[0], (size_t)nbytes) == 0 && ct_compare((unsigned char*)t0[1], (unsigned char*)t1[1], (size_t)nbytes) == 0) - fp2neg(S->Y); -} - - -static void BuildOrdinary3nBasis_dual(const f2elm_t a24, const f2elm_t As[][5], point_full_proj_t *R, unsigned int *r, unsigned int *bitsEll) -{ - point_proj_t D; - f2elm_t xs[2]; - unsigned char ind, bit; - - FirstPoint3n(a24, As, xs[0], R[0], r, &ind, &bit); - *bitsEll = (unsigned int)bit; - *(r+1) = *r; - SecondPoint3n(a24, As, xs[1], R[1], r+1, ind, &bit); - *bitsEll |= ((unsigned int)bit << 1); - - // Get x-coordinate of difference - BiQuad_affine(a24, xs[0], xs[1], D); - eval_full_dual_4_isog(As, D); // Move x over to A = 0 - makeDiff(R[0], R[1], D); -} - - static void FullIsogeny_A_dual(unsigned char* PrivateKeyA, f2elm_t As[][5], f2elm_t a24, unsigned int sike) { // Input: a private key PrivateKeyA in the range [0, 2^eA - 1]. @@ -604,21 +137,6 @@ static void Dlogs3_dual(const f2elm_t *f, int *D, digit_t *d0, digit_t *c0, digi } -static void BuildOrdinary3nBasis_Decomp_dual(const f2elm_t A24, point_proj_t *Rs, unsigned char *r, const unsigned char bitsEll) -{ - unsigned char bitEll[2]; - - bitEll[0] = bitsEll & 0x1; - bitEll[1] = (bitsEll >> 1) & 0x1; - - // Elligator2 both x-coordinates - Elligator2(A24, (unsigned int)r[0]-1, Rs[0]->X, &bitEll[0], DECOMPRESSION); - Elligator2(A24, (unsigned int)r[1]-1, Rs[1]->X, &bitEll[1], DECOMPRESSION); - // Get x-coordinate of difference - BiQuad_affine(A24, Rs[0]->X, Rs[1]->X, Rs[2]); -} - - static void PKADecompression_dual(const unsigned char* SecretKeyB, const unsigned char* CompressedPKA, point_proj_t R, f2elm_t A) { unsigned char bit, rs[3]; @@ -820,120 +338,6 @@ int EphemeralSecretAgreement_B(const unsigned char* PrivateKeyB, const unsigned } -static void BuildEntangledXonly(const f2elm_t A, point_proj_t *R, unsigned char *qnr, unsigned char *ind) -{ - felm_t s; - f2elm_t *t_ptr, r, t; - - // Select the correct table - if (is_sqr_fp2(A, s)) { - t_ptr = (f2elm_t *)table_v_qnr; - *qnr = 1; - } else { - t_ptr = (f2elm_t *)table_v_qr; - *qnr = 0; - } - - // Get x0 - *ind = 0; - do { - fp2mul_mont(A, (felm_t *)*t_ptr++, R[0]->X); // R[0]->X = A*v - fp2neg(R[0]->X); // R[0]->X = -A*v - fp2add(R[0]->X, A, t); - fp2mul_mont(R[0]->X, t, t); - fpadd(t[0], (digit_t*)Montgomery_one, t[0]); - fp2mul_mont(R[0]->X, t, t); // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X - *ind += 1; - } while (!is_sqr_fp2(t, s)); - *ind -= 1; - - if (*qnr) - fpcopy((digit_t*)table_r_qnr[*ind], r[0]); - else - fpcopy((digit_t*)table_r_qr[*ind], r[0]); - - // Get x1 - fp2add(R[0]->X, A, R[1]->X); - fp2neg(R[1]->X); // R[1]->X = -R[0]->X-A - - // Get difference x2, z2 - fp2sub(R[0]->X, R[1]->X, R[2]->Z); - fp2sqr_mont(R[2]->Z, R[2]->Z); - - fpcopy(r[0], r[1]); // (1+i)*ind - fpadd((digit_t*)Montgomery_one, r[0], r[0]); - fp2sqr_mont(r, r); - fp2mul_mont(t, r, R[2]->X); -} - - -static void RecoverY(const f2elm_t A, const point_proj_t *xs, point_full_proj_t *Rs) -{ - f2elm_t t0, t1, t2, t3, t4; - - fp2mul_mont(xs[2]->X, xs[1]->Z, t0); - fp2mul_mont(xs[1]->X, xs[2]->Z, t1); - fp2mul_mont(xs[1]->X, xs[2]->X, t2); - fp2mul_mont(xs[1]->Z, xs[2]->Z, t3); - fp2sqr_mont(xs[1]->X, t4); - fp2sqr_mont(xs[1]->Z, Rs[1]->X); - fp2sub(t2, t3, Rs[1]->Y); - fp2mul_mont(xs[1]->X, Rs[1]->Y, Rs[1]->Y); - fp2add(t4, Rs[1]->X, t4); - fp2mul_mont(xs[2]->Z, t4, t4); - fp2mul_mont(A, t1, Rs[1]->X); - fp2sub(t0, t1, Rs[1]->Z); - - fp2mul_mont(Rs[0]->X, Rs[1]->Z, t0); - fp2add(t2, Rs[1]->X, t1); - fp2add(t1, t1, t1); - fp2sub(t0, t1, t0); - fp2mul_mont(xs[1]->Z, t0, t0); - fp2sub(t0, t4, t0); - fp2mul_mont(Rs[0]->X, t0, t0); - fp2add(t0, Rs[1]->Y, Rs[1]->Y); - fp2mul_mont(Rs[0]->Y, t3, t0); - fp2mul_mont(xs[1]->X, t0, Rs[1]->X); - fp2add(Rs[1]->X, Rs[1]->X, Rs[1]->X); - fp2mul_mont(xs[1]->Z, t0, Rs[1]->Z); - fp2add(Rs[1]->Z, Rs[1]->Z, Rs[1]->Z); - - fp2inv_mont_bingcd(Rs[1]->Z); - fp2mul_mont(Rs[1]->X, Rs[1]->Z, Rs[1]->X); - fp2mul_mont(Rs[1]->Y, Rs[1]->Z, Rs[1]->Y); -} - - -static void BuildOrdinary2nBasis_dual(const f2elm_t A, const f2elm_t Ds[][2], point_full_proj_t *Rs, unsigned char *qnr, unsigned char *ind) -{ - unsigned int i; - felm_t t0; - f2elm_t A6 = {0}; - point_proj_t xs[3] = {0}; - - // Generate x-only entangled basis - BuildEntangledXonly(A, xs, qnr, ind); - fpcopy((digit_t*)Montgomery_one, (xs[0]->Z)[0]); - fpcopy((digit_t*)Montgomery_one, (xs[1]->Z)[0]); - - // Move them back to A = 6 - for(i = 0; i < MAX_Bob; i++) { - eval_3_isog(xs[0], Ds[MAX_Bob-1-i]); - eval_3_isog(xs[1], Ds[MAX_Bob-1-i]); - eval_3_isog(xs[2], Ds[MAX_Bob-1-i]); - } - - // Recover y-coordinates with a single sqrt on A = 6 - fpcopy((digit_t*)Montgomery_one, A6[0]); - fpadd(A6[0], A6[0], t0); - fpadd(t0, t0, A6[0]); - fpadd(A6[0], t0, A6[0]); // A6 = 6 - - CompleteMPoint(A6, xs[0], Rs[0]); - RecoverY(A6, xs, Rs); -} - - static void FullIsogeny_B_dual(const unsigned char* PrivateKeyB, f2elm_t Ds[][2], f2elm_t A) { // Bob's ephemeral public key generation // Input: a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. @@ -1008,46 +412,6 @@ static void Dlogs2_dual(const f2elm_t *f, int *D, digit_t *d0, digit_t *c0, digi } -static void BuildEntangledXonly_Decomp(const f2elm_t A, point_proj_t *R, unsigned char qnr, unsigned char ind) -{ - f2elm_t *t_ptr, r, t; - - // Select the correct table - if ( qnr == 1 ) - t_ptr = (f2elm_t *)table_v_qnr; - else - t_ptr = (f2elm_t *)table_v_qr; - - if (ind >= TABLE_V_LEN/2) - ind = 0; - // Get x0 - fp2mul_mont(A, t_ptr[ind], R[0]->X); // x1 = A*v - fp2neg(R[0]->X); // R[0]->X = -A*v - fp2add(R[0]->X, A, t); - fp2mul_mont(R[0]->X, t, t); - fpadd(t[0], (digit_t*)Montgomery_one, t[0]); - fp2mul_mont(R[0]->X, t, t); // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X - - if (qnr == 1) - fpcopy((digit_t*)table_r_qnr[ind], r[0]); - else - fpcopy((digit_t*)table_r_qr[ind], r[0]); - - // Get x1 - fp2add(R[0]->X, A, R[1]->X); - fp2neg(R[1]->X); // R[1]->X = -R[0]->X-A - - // Get difference x2,z2 - fp2sub(R[0]->X, R[1]->X, R[2]->Z); - fp2sqr_mont(R[2]->Z, R[2]->Z); - - fpcopy(r[0],r[1]); // (1+i)*ind - fpadd((digit_t*)Montgomery_one, r[0], r[0]); - fp2sqr_mont(r, r); - fp2mul_mont(t, r, R[2]->X); -} - - static void PKBDecompression_extended(const unsigned char* SecretKeyA, const unsigned char* CompressedPKB, point_proj_t R, f2elm_t A, unsigned char* tphiBKA_t) { // Bob's PK decompression -- SIKE protocol uint64_t mask = (digit_t)(-1); @@ -1377,20 +741,19 @@ int8_t validate_ciphertext(const unsigned char* ephemeralsk_, const unsigned cha { // If ct validation passes returns 0, otherwise returns -1. point_proj_t phis[3] = {0}, R, S, pts[MAX_INT_POINTS_BOB]; f2elm_t XPB, XQB, XRB, coeff[3], A24plus = {0}, A24minus = {0}, A = {0}, comp1 = {0}, comp2 = {0}, one = {0}; + digit_t temp[NWORDS_ORDER] = {0}, sk[NWORDS_ORDER] = {0}; unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; - digit_t temp[NWORDS_ORDER] = {0}; - digit_t sk[NWORDS_ORDER] = {0}; - + fpcopy((digit_t*)&Montgomery_one, one[0]); // Initialize basis points init_basis((digit_t*)B_gen, XPB, XQB, XRB); fp2_decode(xKA, phis[0]->X); - fpcopy((digit_t*)&Montgomery_one, phis[0]->Z[0]); // phi[0] <- PA + skA*QA + fpcopy(one[0], phis[0]->Z[0]); // phis[0] <- PA + skA*QA // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1 - fpcopy((digit_t*)&Montgomery_one, A24plus[0]); + fpcopy(one[0], A24plus[0]); fp2add(A24plus, A24plus, A24plus); fp2add(A24plus, A24plus, A24minus); // A24minus = 4 fp2add(A24plus, A24minus, A); // A = 6 @@ -1432,7 +795,7 @@ int8_t validate_ciphertext(const unsigned char* ephemeralsk_, const unsigned cha fp2_decode(tphiBKA_t, S->X); fp2_decode(&tphiBKA_t[FP2_ENCODED_BYTES], S->Z); // Recover t*3^n*((a0+skA*a1)*S1 + (b0+skA*b1)*S2) decode_to_digits(&tphiBKA_t[2*FP2_ENCODED_BYTES], temp, ORDER_A_ENCODED_BYTES, NWORDS_ORDER); - Ladder(phis[0], temp, A, OALICE_BITS, R); // t*(phiP + skA*phiQ) + Ladder(phis[0], temp, A, OALICE_BITS, R); // R <- t*(phiP + skA*phiQ), t in {(a0+skA*a1)^-1, (b0+skA*b1)^-1} fp2mul_mont(R->X, S->Z, comp1); fp2mul_mont(R->Z, S->X, comp2); diff --git a/src/compression/torsion_basis.c b/src/compression/torsion_basis.c index c39d9d7..af54ef8 100644 --- a/src/compression/torsion_basis.c +++ b/src/compression/torsion_basis.c @@ -5,283 +5,479 @@ *********************************************************************************************/ -void get_2_torsion_entangled_basis_compression(const f2elm_t A, point_t S1, point_t S2, unsigned char *bit, unsigned char *entry) -{ // Build an entangled basis for E[2^m]. - // At first glance this is similar to the Elligator 2 technique, but the field element u is a *square* here, not a non-square. - unsigned int i, index, isSqrA = 0; - felm_t r = {0}, s, z, alpha, twoalphainv, beta; - f2elm_t t, u, u0, one = {0}, *t_ptr; - felm_t *x1 = (felm_t*) S1->x, *y1 = (felm_t*) S1->y, *x2 = (felm_t*) S2->x, *y2 = (felm_t*) S2->y; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - copy_words((const digit_t *)u0_entang, u0[0], 2*NWORDS_FIELD); - copy_words((const digit_t *)u_entang, u[0], 2*NWORDS_FIELD); +#define COMPRESSION 0 +#define DECOMPRESSION 1 - // Select the correct table - if (is_sqr_fp2(A, s)) { - t_ptr = (f2elm_t *)table_v_qnr; - isSqrA = 1; - } else { - t_ptr = (f2elm_t *)table_v_qr; - } - *bit = (unsigned char)isSqrA; // This bit is transmitted along with the PK to speedup decompression - index = 0; - do { - fp2mul_mont(A, (felm_t *)*t_ptr++, x1); // x1 = A*v - fp2neg(x1); // x1 = -A*v - fp2add(x1, A, t); - fp2mul_mont(x1, t, t); - fpadd(t[0], one[0], t[0]); - fp2mul_mont(x1, t, t); // t = x1^3 + A*x1^2 + x1 = x1(x1(x1 + A) + 1) - index += 2; - } while (!is_sqr_fp2(t, s)); - *entry = ((unsigned char)index - 2)/2; // This table entry will also be transmitted along with the PubKey to speedup decompression - - if (isSqrA) - copy_words((const digit_t *)table_r_qnr[(index-2)/2], r, NWORDS_FIELD); - else - copy_words((const digit_t *)table_r_qr[(index-2)/2], r, NWORDS_FIELD); +static void Elligator2(const f2elm_t a24, const unsigned int r, f2elm_t x, unsigned char *bit, const unsigned char COMPorDEC) +{ // Generate an x-coordinate of a point on curve with (affine) coefficient a24 + // Use the counter r + int i; + felm_t one_fp, a2, b2, N, temp0, temp1; + f2elm_t A, y2, *t_ptr; + + fpcopy((digit_t*)&Montgomery_one, one_fp); + fp2add(a24, a24, A); + fpsub(A[0], one_fp, A[0]); + fp2add(A, A, A); // A = 4*a24-2 + + // Elligator computation + t_ptr = (f2elm_t *)&v_3_torsion[r]; + fp2mul_mont(A, (felm_t*)t_ptr, x); // x = A*v; v := 1/(1 + U*r^2) table lookup + fp2neg(x); // x = -A*v; - // Finish sqrt computation for y1 = sqrt(x1^3+A*x1^2+x1) - fpadd(t[0],s,z); - fpdiv2(z,z); - fpcopy(z,alpha); - for (i = 0; i < OALICE_BITS - 2; i++) { - fpsqr_mont(alpha, alpha); - } - for (i = 0; i < OBOB_EXPON; i++) { - fpsqr_mont(alpha, s); - fpmul_mont(alpha, s, alpha); // alpha = z^((p+1)/4) - } + if (COMPorDEC == COMPRESSION) { + fp2add(A, x, y2); // y2 = x + A + fp2mul_mont(y2, x, y2); // y2 = x*(x + A) + fpadd(y2[0], one_fp, y2[0]); // y2 = x(x + A) + 1 + fp2mul_mont(x, y2, y2); // y2 = x*(x^2 + Ax + 1); + fpsqr_mont(y2[0], a2); + fpsqr_mont(y2[1], b2); + fpadd(a2, b2, N); // N := norm(y2); - fpadd(alpha,alpha,twoalphainv); - fpinv_mont_bingcd(twoalphainv); - fpmul_mont(t[1],twoalphainv,beta); - fpsqr_mont(alpha, twoalphainv); - fpcorrection(twoalphainv); - fpcorrection(z); - if (ct_compare((unsigned char*)twoalphainv, (unsigned char*)z, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { - fpcopy(alpha,y1[0]); - fpcopy(beta,y1[1]); + fpcopy(N, temp0); + for (i = 0; i < OALICE_BITS - 2; i++) { + fpsqr_mont(temp0, temp0); + } + for (i = 0; i < OBOB_EXPON; i++) { + fpsqr_mont(temp0, temp1); + fpmul_mont(temp0, temp1, temp0); + } + fpsqr_mont(temp0, temp1); // z = N^((p + 1) div 4); + fpcorrection(temp1); + fpcorrection(N); + if (memcmp(temp1, N, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { + fp2neg(x); + fp2sub(x, A, x); // x = -x - A; + if (COMPorDEC == COMPRESSION) + *bit = 1; + } } else { - fpneg(beta); - fpcopy(beta,y1[0]); - fpneg(alpha); - fpcopy(alpha,y1[1]); - } - fp2add(x1, A, x2); - fp2neg(x2); // x2 = A*v - A - fp2mul_mont(u0,y1,y2); - fpmul_mont(r,y2[0],y2[0]); - fpmul_mont(r,y2[1],y2[1]); // y2 = u0*r*y1 + if (*bit) { + fp2neg(x); + fp2sub(x,A,x); // x = -x - A; + } + } } -void get_2_torsion_entangled_basis_decompression(const f2elm_t A, point_t S1, point_t S2, unsigned char isASqr, unsigned char entry) -{ // Build an entangled basis for E[2^m] during decompression using the the entry and table already computed during compression - unsigned int i; - felm_t r = {0}, s, z, alpha, twoalphainv, beta; - f2elm_t t, u, u0, one = {0}, *t_ptr; - felm_t *x1 = (felm_t*) S1->x, *y1 = (felm_t*) S1->y, *x2 = (felm_t*) S2->x, *y2 = (felm_t*) S2->y; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - - copy_words((const digit_t *)u0_entang, u0[0], 2*NWORDS_FIELD); - copy_words((const digit_t *)u_entang, u[0], 2*NWORDS_FIELD); - - // Select the table - t_ptr = isASqr ? (f2elm_t *)table_v_qnr : (f2elm_t *)table_v_qr; - - fp2mul_mont(A, t_ptr[entry], x1); // x1 = A*v - fp2neg(x1); // x1 = -A*v - fp2add(x1, A, t); - fp2mul_mont(x1, t, t); - fpadd(t[0], one[0], t[0]); - fp2mul_mont(x1, t, t); // t = x1^3 + A*x1^2 + x1 = x1(x1(x1 + A) + 1) - is_sqr_fp2(t, s); - - if (isASqr) - copy_words((const digit_t *)table_r_qnr[entry], r, NWORDS_FIELD); - else - copy_words((const digit_t *)table_r_qr[entry], r, NWORDS_FIELD); - - // Finish sqrt computation for y1 = sqrt(x1^3+A*x1^2+x1) - fpadd(t[0],s,z); - fpdiv2(z,z); - fpcopy(z,alpha); - for (i = 0; i < OALICE_BITS - 2; i++) { - fpsqr_mont(alpha, alpha); +static void TripleAndParabola_proj(const point_full_proj_t R, f2elm_t l1x, f2elm_t l1z) +{ + fp2sqr_mont(R->X, l1z); + fp2add(l1z, l1z, l1x); + fp2add(l1x, l1z, l1x); + fpadd(l1x[0], (digit_t*)&Montgomery_one, l1x[0]); + fp2add(R->Y, R->Y, l1z); +} + + +static void Tate3_proj(const point_full_proj_t P, const point_full_proj_t Q, f2elm_t gX, f2elm_t gZ) +{ + f2elm_t t0, l1x; + + TripleAndParabola_proj(P, l1x, gZ); + fp2sub(Q->X, P->X, gX); + fp2mul_mont(l1x, gX, gX); + fp2sub(P->Y, Q->Y, t0); + fp2mul_mont(gZ, t0, t0); + fp2add(gX, t0, gX); +} + + +static void FinalExpo3_2way(f2elm_t *gX, f2elm_t *gZ) +{ + unsigned int i, j; + f2elm_t f_[2], finv[2]; + + for(i = 0; i < 2; i++) { + fp2copy(gZ[i], f_[i]); + fpneg(f_[i][1]); // Conjugate + fp2mul_mont(gX[i], f_[i], f_[i]); } - for (i = 0; i < OBOB_EXPON; i++) { - fpsqr_mont(alpha, s); - fpmul_mont(alpha, s, alpha); // alpha = z^((p+1)/4) + mont_n_way_inv(f_,2,finv); + for(i = 0; i < 2; i++) { + fpneg(gX[i][1]); + fp2mul_mont(gX[i], gZ[i], gX[i]); + fp2mul_mont(gX[i], finv[i], gX[i]); + for(j = 0; j < OALICE_BITS; j++) + fp2sqr_mont(gX[i], gX[i]); + for(j = 0; j < OBOB_EXPON-1; j++) + cube_Fp2_cycl(gX[i], (digit_t*)&Montgomery_one); } +} + + +static void FinalExpo3(f2elm_t gX, f2elm_t gZ) +{ + unsigned int i; + f2elm_t f_; + + fp2copy(gZ, f_); + fpneg(f_[1]); + fp2mul_mont(gX, f_, f_); + fp2inv_mont_bingcd(f_); + fpneg(gX[1]); + fp2mul_mont(gX,gZ, gX); + fp2mul_mont(gX,f_, gX); + for(i = 0; i < OALICE_BITS; i++) + fp2sqr_mont(gX, gX); + for(i = 0; i < OBOB_EXPON-1; i++) + cube_Fp2_cycl(gX, (digit_t*)Montgomery_one); +} + + +static void make_positive(f2elm_t x) +{ + unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); + felm_t zero = {0}; - fpadd(alpha,alpha,twoalphainv); - fpinv_mont_bingcd(twoalphainv); - fpmul_mont(t[1],twoalphainv,beta); - fpsqr_mont(alpha, twoalphainv); - fpcorrection(twoalphainv); - fpcorrection(z); - if (ct_compare((unsigned char*)twoalphainv, (unsigned char*)z, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { - fpcopy(alpha,y1[0]); - fpcopy(beta,y1[1]); + from_fp2mont(x, x); + if (memcmp(x[0], zero, (size_t)nbytes) != 0) { + if ((x[0][0] & 1) == 1) + fp2neg(x); } else { - fpneg(beta); - fpcopy(beta,y1[0]); - fpneg(alpha); - fpcopy(alpha,y1[1]); + if ((x[1][0] & 1) == 1) + fp2neg(x); } - fp2add(x1, A, x2); - fp2neg(x2); // x2 = A*v - A - fp2mul_mont(u0,y1,y2); - fpmul_mont(r,y2[0],y2[0]); - fpmul_mont(r,y2[1],y2[1]); // y2 = u0*r*y1 + to_fp2mont(x, x); } -void BasePoint3n(f2elm_t A, unsigned int *r, point_proj_t P, point_proj_t Q) -{ // xz-only construction of a point of order 3^n in the Montgomery curve y^2 = x^3 + A*x^2 + x from base counter r. - // This is essentially the Elligator 2 technique coupled with cofactor multiplication and LI checking. - int i; - felm_t a2, b2, N, temp0, temp1; - f2elm_t A2, A24, two = {0}, x, y2, one_fp2 = {0}, *t_ptr; - point_proj_t S; - - fpcopy((digit_t*)&Montgomery_one, one_fp2[0]); - fp2div2(A,A2); - fpcopy(one_fp2[0],two[0]); - fpadd(two[0], two[0], two[0]); - fp2add(A,two,A24); - fp2div2(A24,A24); - fp2div2(A24,A24); - - t_ptr = (f2elm_t *)&v_3_torsion[*r]; - do { - *r += 1; - fp2mul_mont(A,(felm_t*)t_ptr++,x); // x = A*v; v := 1/(1 + U*r^2) table lookup - fp2neg(x); // x = -A*v; - fp2add(A,x,y2); // y2 = x + A - fp2mul_mont(y2, x, y2); // y2 = x*(x + A) - fpadd(y2[0], one_fp2[0], y2[0]); // y2 = x(x + A) + 1 - fp2mul_mont(x,y2,y2); // y2 = x*(x^2 + Ax + 1); - fpsqr_mont(y2[0],a2); - fpsqr_mont(y2[1],b2); - fpadd(a2,b2,N); // N := norm(y2); - - fpcopy(N,temp0); - for (i = 0; i < OALICE_BITS - 2; i++) { - fpsqr_mont(temp0, temp0); - } - for (i = 0; i < OBOB_EXPON; i++) { - fpsqr_mont(temp0, temp1); - fpmul_mont(temp0, temp1, temp0); - } - fpsqr_mont(temp0,temp1); // z = N^((p + 1) div 4); - fpcorrection(temp1); - fpcorrection(N); - if (ct_compare((unsigned char*)temp1,(unsigned char*)N,NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { - fp2neg(x); - fp2sub(x,A,x); // x = -x - A; - } - fp2copy(x,S->X); - fp2copy(one_fp2,S->Z); - Double(S,P,A24,OALICE_BITS); // x, z := Double(A24, x, 1, eA); - xTPLe_fast(P,Q,A2,OBOB_EXPON-1); // t, w := Triple(A_2, x, z, eB-1); - fp2correction(Q->X); - fp2correction(Q->Z); - } while (is_felm_zero(Q->Z[0]) && is_felm_zero(Q->Z[1])); +static bool FirstPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char *ind) +{ + point_full_proj_t R3,S3; + f2elm_t gX[2],gZ[2]; + felm_t zero = {0}; + unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); + unsigned char alpha,beta; + + fpcopy((digit_t*)B_gen_3_tors + 0*NWORDS_FIELD, (R3->X)[0]); + fpcopy((digit_t*)B_gen_3_tors + 1*NWORDS_FIELD, (R3->X)[1]); + fpcopy((digit_t*)B_gen_3_tors + 2*NWORDS_FIELD, (R3->Y)[0]); + fpcopy((digit_t*)B_gen_3_tors + 3*NWORDS_FIELD, (R3->Y)[1]); + fpcopy((digit_t*)B_gen_3_tors + 4*NWORDS_FIELD, (S3->X)[0]); + fpcopy((digit_t*)B_gen_3_tors + 5*NWORDS_FIELD, (S3->X)[1]); + fpcopy((digit_t*)B_gen_3_tors + 6*NWORDS_FIELD, (S3->Y)[0]); + fpcopy((digit_t*)B_gen_3_tors + 7*NWORDS_FIELD, (S3->Y)[1]); + + CompletePoint(P,R); + Tate3_proj(R3,R,gX[0],gZ[0]); + Tate3_proj(S3,R,gX[1],gZ[1]); + FinalExpo3_2way(gX,gZ); + + // Do small DLog with respect to g_R3_S3 + fp2correction(gX[0]); + fp2correction(gX[1]); + if (memcmp(gX[0][1], zero, (size_t)nbytes) == 0) // = 1 + alpha = 0; + else if (memcmp(gX[0][1], g_R_S_im, (size_t)nbytes) == 0) // = g_R3_S3 + alpha = 1; + else // = g_R3_S3^2 + alpha = 2; + + if (memcmp(gX[1][1], zero, (size_t)nbytes) == 0) // = 1 + beta = 0; + else if (memcmp(gX[1][1], g_R_S_im, (size_t)nbytes) == 0) // = g_R3_S3 + beta = 1; + else // = g_R3_S3^2 + beta = 2; + + if (alpha == 0 && beta == 0) // Not full order + return false; + + // Return the 3-torsion point that R lies above + if (alpha == 0) // Lies above R3 + *ind = 0; + else if (beta == 0) // Lies above S3 + *ind = 1; + else if (alpha + beta == 3) // Lies above R3+S3 + *ind = 3; + else // Lies above R3-S3 + *ind = 2; + + return true; } -void BasePoint3n_decompression(f2elm_t A, const unsigned char r, point_proj_t P) -{ // Deterministic xz-only construction of a point of order 3^n in the Montgomery curve y^2 = x^3 + A*x^2 + x from counter r1. - // Notice that the Elligator 2 counter r was generated beforehand during key compression without linear independence testing - int i; - felm_t a2, b2, N, temp0, temp1; - f2elm_t A2, A24, two = {0}, x, y2, one_fp2 = {0}; - point_proj_t S; - - fpcopy((digit_t*)&Montgomery_one, one_fp2[0]); - fp2div2(A,A2); - fpcopy((digit_t*)&Montgomery_one,two[0]); - fpadd(two[0], two[0], two[0]); - fp2add(A,two,A24); - fp2div2(A24,A24); - fp2div2(A24,A24); - - if ( (r-1) < TABLE_V3_LEN) - fp2mul_mont(A, (felm_t*)&v_3_torsion[r-1], x); // x = A*v; - else // If the index r is out of range, just use a default (0) - fp2mul_mont(A, (felm_t*)&v_3_torsion[0], x); // x = A*v0; - fp2neg(x); // x = -A*v; - fp2add(x,A,y2); // y2 = x + A - fp2mul_mont(x,y2,y2); // y2 = x*(x + A) - fpadd(y2[0], one_fp2[0], y2[0]); // y2 = x(x + A) + 1 - fp2mul_mont(x, y2, y2); // y2 = x*(x^2 + A*x + 1); - fpsqr_mont(y2[0],a2); - fpsqr_mont(y2[1],b2); - fpadd(a2,b2,N); // N := Fp!norm(y2); - - fpcopy(N,temp0); - for (i = 0; i < OALICE_BITS - 2; i++) { - fpsqr_mont(temp0, temp0); - } - for (i = 0; i < OBOB_EXPON; i++) { - fpsqr_mont(temp0, temp1); - fpmul_mont(temp0, temp1, temp0); +static bool SecondPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char ind) +{ + point_full_proj_t RS3; + f2elm_t gX, gZ; + felm_t zero = {0}; + unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); + + // Pair with 3-torsion point determined by first point + fpcopy((digit_t*)B_gen_3_tors + (4*ind + 0)*NWORDS_FIELD, (RS3->X)[0]); + fpcopy((digit_t*)B_gen_3_tors + (4*ind + 1)*NWORDS_FIELD, (RS3->X)[1]); + fpcopy((digit_t*)B_gen_3_tors + (4*ind + 2)*NWORDS_FIELD, (RS3->Y)[0]); + fpcopy((digit_t*)B_gen_3_tors + (4*ind + 3)*NWORDS_FIELD, (RS3->Y)[1]); + + CompletePoint(P, R); + Tate3_proj(RS3, R, gX, gZ); + FinalExpo3(gX, gZ); + + fp2correction(gX); + if (memcmp(gX[1], zero, (size_t)nbytes) != 0) // Not equal to 1 + return true; + else + return false; +} + + +static void FirstPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char *ind, unsigned char *bitEll) +{ + bool b = false; + point_proj_t P; + felm_t zero = {0}; + *r = 0; + + while (!b) { + *bitEll = 0; + Elligator2(a24, *r, x, bitEll, COMPRESSION); // Get x-coordinate on curve a24 + + fp2copy(x, P->X); + fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]); + fpcopy(zero, (P->Z)[1]); + eval_full_dual_4_isog(As, P); // Move x over to A = 0 + + b = FirstPoint_dual(P, R, ind); // Compute DLog with 3-torsion points + *r = *r + 1; } - fpsqr_mont(temp0,temp1); // z = N^((p + 1) div 4); - fpcorrection(temp1); - fpcorrection(N); - if (ct_compare((unsigned char*)temp1,(unsigned char*)N,NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { - fp2neg(x); - fp2sub(x,A,x); // x = -x - A; +} + + +static void SecondPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char ind, unsigned char *bitEll) +{ + bool b = false; + point_proj_t P; + felm_t zero = {0}; + + while (!b) { + *bitEll = 0; + Elligator2(a24, *r, x, bitEll, COMPRESSION); + + fp2copy(x, P->X); + fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]); + fpcopy(zero, (P->Z)[1]); + eval_full_dual_4_isog(As, P); // Move x over to A = 0 + + b = SecondPoint_dual(P, R, ind); + *r = *r + 1; } - fp2copy(x,S->X); - fp2copy(one_fp2,S->Z); - Double(S,P,A24,OALICE_BITS); // x, z := Double(A24, x, 1, eA); } -void BuildOrdinaryE3nBasis(f2elm_t A, point_full_proj_t R1, point_full_proj_t R2, unsigned int rs[2]) -{ // Generate a basis for the 3^eB torsion - unsigned int r = 0; - f2elm_t t2w1, t1w2; - point_proj_t P, Q, R, S; +static void makeDiff(const point_full_proj_t R, point_full_proj_t S, const point_proj_t D) +{ + f2elm_t t0, t1, t2; + unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD); + + fp2sub(R->X, S->X, t0); + fp2sub(R->Y, S->Y, t1); + fp2sqr_mont(t0, t0); + fp2sqr_mont(t1, t1); + fp2add(R->X, S->X, t2); + fp2mul_mont(t0, t2, t2); + fp2sub(t1, t2, t1); + fp2mul_mont(D->Z, t1, t1); + fp2mul_mont(D->X, t0, t0); + fp2correction(t0); + fp2correction(t1); + if (memcmp(t0[0], t1[0], (size_t)nbytes) == 0 && memcmp(t0[1], t1[1], (size_t)nbytes) == 0) + fp2neg(S->Y); +} + + +static void BiQuad_affine(const f2elm_t a24, const f2elm_t x0, const f2elm_t x1, point_proj_t R) +{ + f2elm_t Ap2, aa, bb, cc, t0, t1; + + fp2add(a24, a24, Ap2); + fp2add(Ap2, Ap2, Ap2); // Ap2 = a+2 = 4*a24 + + fp2sub(x0, x1, aa); + fp2sqr_mont(aa, aa); + + fp2mul_mont(x0, x1, cc); + fpsub(cc[0], (digit_t*)Montgomery_one, cc[0]); + fp2sqr_mont(cc, cc); + + fpsub(x0[0], (digit_t*)Montgomery_one, bb[0]); + fpcopy(x0[1], bb[1]); + fp2sqr_mont(bb, bb); + fp2mul_mont(Ap2, x0, t0); + fp2add(bb, t0, bb); + fp2mul_mont(x1, bb, bb); + fpsub(x1[0], (digit_t*)Montgomery_one, t0[0]); + fpcopy(x1[1], t0[1]); + fp2sqr_mont(t0, t0); + fp2mul_mont(Ap2, x1, t1); + fp2add(t0, t1, t0); + fp2mul_mont(x0, t0, t0); + fp2add(bb, t0, bb); + fp2add(bb, bb, bb); + + fp2sqr_mont(bb, t0); + fp2mul_mont(aa, cc, t1); + fp2add(t1, t1, t1); + fp2add(t1, t1, t1); + fp2sub(t0, t1, t0); + sqrt_Fp2(t0, t0); + make_positive(t0); // Make the sqrt "positive" + fp2add(bb, t0, R->X); + fp2add(aa, aa, R->Z); +} + + +static void BuildOrdinary3nBasis_dual(const f2elm_t a24, const f2elm_t As[][5], point_full_proj_t *R, unsigned int *r, unsigned int *bitsEll) +{ + point_proj_t D; + f2elm_t xs[2]; + unsigned char ind, bit; + + FirstPoint3n(a24, As, xs[0], R[0], r, &ind, &bit); + *bitsEll = (unsigned int)bit; + *(r+1) = *r; + SecondPoint3n(a24, As, xs[1], R[1], r+1, ind, &bit); + *bitsEll |= ((unsigned int)bit << 1); + + // Get x-coordinate of difference + BiQuad_affine(a24, xs[0], xs[1], D); + eval_full_dual_4_isog(As, D); // Move x over to A = 0 + makeDiff(R[0], R[1], D); +} + + +static void BuildOrdinary3nBasis_Decomp_dual(const f2elm_t A24, point_proj_t *Rs, unsigned char *r, const unsigned char bitsEll) +{ + unsigned char bitEll[2]; - // 1st basis point: - BasePoint3n(A,&r,P,Q); - rs[0] = r; + bitEll[0] = bitsEll & 0x1; + bitEll[1] = (bitsEll >> 1) & 0x1; - // 2nd basis point: + // Elligator2 both x-coordinates + Elligator2(A24, (unsigned int)r[0]-1, Rs[0]->X, &bitEll[0], DECOMPRESSION); + Elligator2(A24, (unsigned int)r[1]-1, Rs[1]->X, &bitEll[1], DECOMPRESSION); + // Get x-coordinate of difference + BiQuad_affine(A24, Rs[0]->X, Rs[1]->X, Rs[2]); +} + + + +static void BuildEntangledXonly(const f2elm_t A, point_proj_t *R, unsigned char *qnr, unsigned char *ind) +{ + felm_t s; + f2elm_t *t_ptr, r, t; + + // Select the correct table + if (is_sqr_fp2(A, s)) { + t_ptr = (f2elm_t *)table_v_qnr; + *qnr = 1; + } else { + t_ptr = (f2elm_t *)table_v_qr; + *qnr = 0; + } + + // Get x0 + *ind = 0; do { - BasePoint3n(A,&r,R,S); - fp2mul_mont(S->X,Q->Z,t2w1); - fp2mul_mont(Q->X,S->Z,t1w2); - fp2correction(t2w1); - fp2correction(t1w2); - } while (ct_compare((unsigned char*)t2w1[0],(unsigned char*)t1w2[0],NBITS_TO_NBYTES(NBITS_FIELD)) == 0 && ct_compare((unsigned char*)t2w1[1],(unsigned char*)t1w2[1],NBITS_TO_NBYTES(NBITS_FIELD)) == 0); // Pr[t2/w2 == t1/w1] = 1/4: E[loop length] = 4/3 - rs[1] = r; - - // NB: ideally the following point completions could share one inversion at the cost of 3 products, but this is not implemented here. - CompleteMPoint(A,P,R1); - CompleteMPoint(A,R,R2); + fp2mul_mont(A, (felm_t *)*t_ptr++, R[0]->X); // R[0]->X = A*v + fp2neg(R[0]->X); // R[0]->X = -A*v + fp2add(R[0]->X, A, t); + fp2mul_mont(R[0]->X, t, t); + fpadd(t[0], (digit_t*)Montgomery_one, t[0]); + fp2mul_mont(R[0]->X, t, t); // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X + *ind += 1; + } while (!is_sqr_fp2(t, s)); + *ind -= 1; + + if (*qnr) + fpcopy((digit_t*)table_r_qnr[*ind], r[0]); + else + fpcopy((digit_t*)table_r_qr[*ind], r[0]); + + // Get x1 + fp2add(R[0]->X, A, R[1]->X); + fp2neg(R[1]->X); // R[1]->X = -R[0]->X-A + + // Get difference x2, z2 + fp2sub(R[0]->X, R[1]->X, R[2]->Z); + fp2sqr_mont(R[2]->Z, R[2]->Z); + + fpcopy(r[0], r[1]); // (1+i)*ind + fpadd((digit_t*)Montgomery_one, r[0], r[0]); + fp2sqr_mont(r, r); + fp2mul_mont(t, r, R[2]->X); } -void BuildOrdinaryE3nBasis_decompression(f2elm_t A, point_full_proj_t R1, point_full_proj_t R2, const unsigned char r1, const unsigned char r2) -{ // Generate a basis for the 3^eB torsion using shared information - point_proj_t P, R; - - // 1st basis point: - BasePoint3n_decompression(A,r1,P); - // 2nd basis point: - BasePoint3n_decompression(A,r2,R); + +static void BuildOrdinary2nBasis_dual(const f2elm_t A, const f2elm_t Ds[][2], point_full_proj_t *Rs, unsigned char *qnr, unsigned char *ind) +{ + unsigned int i; + felm_t t0; + f2elm_t A6 = {0}; + point_proj_t xs[3] = {0}; + + // Generate x-only entangled basis + BuildEntangledXonly(A, xs, qnr, ind); + fpcopy((digit_t*)Montgomery_one, (xs[0]->Z)[0]); + fpcopy((digit_t*)Montgomery_one, (xs[1]->Z)[0]); + + // Move them back to A = 6 + for(i = 0; i < MAX_Bob; i++) { + eval_3_isog(xs[0], Ds[MAX_Bob-1-i]); + eval_3_isog(xs[1], Ds[MAX_Bob-1-i]); + eval_3_isog(xs[2], Ds[MAX_Bob-1-i]); + } + + // Recover y-coordinates with a single sqrt on A = 6 + fpcopy((digit_t*)Montgomery_one, A6[0]); + fpadd(A6[0], A6[0], t0); + fpadd(t0, t0, A6[0]); + fpadd(A6[0], t0, A6[0]); + + CompleteMPoint(A6, xs[0], Rs[0]); + RecoverY(A6, xs, Rs); +} + + + +static void BuildEntangledXonly_Decomp(const f2elm_t A, point_proj_t *R, unsigned char qnr, unsigned char ind) +{ + f2elm_t *t_ptr, r, t; - // NB: ideally the following point completions could share one inversion at the cost of 3 products, but this is not implemented here. - CompleteMPoint(A,P,R1); - CompleteMPoint(A,R,R2); + // Select the correct table + if ( qnr == 1 ) + t_ptr = (f2elm_t *)table_v_qnr; + else + t_ptr = (f2elm_t *)table_v_qr; + + if (ind >= TABLE_V_LEN/2) + ind = 0; + // Get x0 + fp2mul_mont(A, t_ptr[ind], R[0]->X); // x1 = A*v + fp2neg(R[0]->X); // R[0]->X = -A*v + fp2add(R[0]->X, A, t); + fp2mul_mont(R[0]->X, t, t); + fpadd(t[0], (digit_t*)Montgomery_one, t[0]); + fp2mul_mont(R[0]->X, t, t); // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X + + if (qnr == 1) + fpcopy((digit_t*)table_r_qnr[ind], r[0]); + else + fpcopy((digit_t*)table_r_qr[ind], r[0]); + + // Get x1 + fp2add(R[0]->X, A, R[1]->X); + fp2neg(R[1]->X); // R[1]->X = -R[0]->X-A + + // Get difference x2,z2 + fp2sub(R[0]->X, R[1]->X, R[2]->Z); + fp2sqr_mont(R[2]->Z, R[2]->Z); + + fpcopy(r[0],r[1]); // (1+i)*ind + fpadd((digit_t*)Montgomery_one, r[0], r[0]); + fp2sqr_mont(r, r); + fp2mul_mont(t, r, R[2]->X); } - diff --git a/src/ec_isogeny.c b/src/ec_isogeny.c index 5b9607e..42ecd66 100644 --- a/src/ec_isogeny.c +++ b/src/ec_isogeny.c @@ -267,33 +267,36 @@ void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv) } -void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24) +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24) { // Simultaneous doubling and differential addition. // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. f2elm_t t0, t1, t2; - mp2_add(P->X, P->Z, t0); // t0 = XP+ZP - mp2_sub_p2(P->X, P->Z, t1); // t1 = XP-ZP - fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 - mp2_sub_p2(Q->X, Q->Z, t2); // t2 = XQ-ZQ - mp2_add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ - fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) - fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 - fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) - mp2_sub_p2(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 - fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 - fp2mul_mont(A24, t2, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] - mp2_sub_p2(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) - mp2_add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 - mp2_add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) - fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] - fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 - fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 - fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2add(P->X, P->Z, t0); // t0 = XP+ZP + fp2sub(P->X, P->Z, t1); // t1 = XP-ZP + fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 + fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ + fp2correction(t2); + fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 + fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(Q->X, ZPQ, Q->X); // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(Q->Z, XPQ, Q->Z); // ZQ = XPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 } + static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option) { // Swap points. // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P @@ -354,8 +357,7 @@ static void LADDER3PT(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, con mask = 0 - (digit_t)swap; swap_points(R, R2, mask); - xDBLADD(R0, R2, R->X, A24); - fp2mul_mont(R2->X, R->Z, R2->X); + xDBLADD(R0, R2, R->X, R->Z, A24); } swap = 0 ^ prevbit; mask = 0 - (digit_t)swap; @@ -364,6 +366,44 @@ static void LADDER3PT(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, con #ifdef COMPRESS + +static void RecoverY(const f2elm_t A, const point_proj_t *xs, point_full_proj_t *Rs) +{ + f2elm_t t0, t1, t2, t3, t4; + + fp2mul_mont(xs[2]->X, xs[1]->Z, t0); + fp2mul_mont(xs[1]->X, xs[2]->Z, t1); + fp2mul_mont(xs[1]->X, xs[2]->X, t2); + fp2mul_mont(xs[1]->Z, xs[2]->Z, t3); + fp2sqr_mont(xs[1]->X, t4); + fp2sqr_mont(xs[1]->Z, Rs[1]->X); + fp2sub(t2, t3, Rs[1]->Y); + fp2mul_mont(xs[1]->X, Rs[1]->Y, Rs[1]->Y); + fp2add(t4, Rs[1]->X, t4); + fp2mul_mont(xs[2]->Z, t4, t4); + fp2mul_mont(A, t1, Rs[1]->X); + fp2sub(t0, t1, Rs[1]->Z); + + fp2mul_mont(Rs[0]->X, Rs[1]->Z, t0); + fp2add(t2, Rs[1]->X, t1); + fp2add(t1, t1, t1); + fp2sub(t0, t1, t0); + fp2mul_mont(xs[1]->Z, t0, t0); + fp2sub(t0, t4, t0); + fp2mul_mont(Rs[0]->X, t0, t0); + fp2add(t0, Rs[1]->Y, Rs[1]->Y); + fp2mul_mont(Rs[0]->Y, t3, t0); + fp2mul_mont(xs[1]->X, t0, Rs[1]->X); + fp2add(Rs[1]->X, Rs[1]->X, Rs[1]->X); + fp2mul_mont(xs[1]->Z, t0, Rs[1]->Z); + fp2add(Rs[1]->Z, Rs[1]->Z, Rs[1]->Z); + + fp2inv_mont_bingcd(Rs[1]->Z); + fp2mul_mont(Rs[1]->X, Rs[1]->Z, Rs[1]->X); + fp2mul_mont(Rs[1]->Y, Rs[1]->Z, Rs[1]->Y); +} + + static void CompletePoint(const point_proj_t P, point_full_proj_t R) { // Complete point on A = 0 curve f2elm_t xz, s2, r2, yz, invz, t0, t1, one = {0}; @@ -391,7 +431,7 @@ void CompleteMPoint(const f2elm_t A, point_proj_t P, point_full_proj_t R) f2elm_t zero = {0}, one = {0}, xz, yz, s2, r2, invz, temp0, temp1; fpcopy((digit_t*)&Montgomery_one, one[0]); - if (ct_compare((unsigned char*)P->Z[0], (unsigned char*)zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0 || ct_compare((unsigned char*)P->Z[1], (unsigned char*)zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { + if (memcmp(P->Z[0], zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0 || memcmp(P->Z[1], zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) { fp2mul_mont(P->X, P->Z, xz); // xz = x*z; fpsub(P->X[0], P->Z[1], temp0[0]); fpadd(P->X[1], P->Z[0], temp0[1]); @@ -482,136 +522,6 @@ void xTPLe_fast(point_proj_t P, point_proj_t Q, const f2elm_t A2, int e) } -void ADD(const point_full_proj_t P, const f2elm_t QX, const f2elm_t QY, const f2elm_t QZ, const f2elm_t A, point_full_proj_t R) -{ // General addition. - // Input: projective Montgomery points P=(XP:YP:ZP) and Q=(XQ:YQ:ZQ). - // Output: projective Montgomery point R <- P+Q = (XQP:YQP:ZQP). - f2elm_t t0 = {0}, t1 = {0}, t2 = {0}, t3 = {0}, t4 = {0}, t5 = {0}, t6 = {0}, t7 = {0}; - - fp2mul_mont(QX, P->Z, t0); // t0 = x2*Z1 - fp2mul_mont(P->X, QZ, t1); // t1 = X1*z2 - fp2add(t0, t1, t2); // t2 = t0 + t1 - fp2sub(t1, t0, t3); // t3 = t1 - t0 - fp2mul_mont(QX, P->X, t0); // t0 = x2*X1 - fp2mul_mont(P->Z, QZ, t1); // t1 = Z1*z2 - fp2add(t0, t1, t4); // t4 = t0 + t1 - fp2mul_mont(t0, A, t0); // t0 = t0*A - fp2mul_mont(QY, P->Y, t5); // t5 = y2*Y1 - fp2sub(t0, t5, t0); // t0 = t0 - t5 - fp2mul_mont(t0, t1, t0); // t0 = t0*t1 - fp2add(t0, t0, t0); // t0 = t0 + t0 - fp2mul_mont(t2, t4, t5); // t5 = t2*t4 - fp2add(t5, t0, t5); // t5 = t5 + t0 - fp2sqr_mont(P->X, t0); // t0 = X1 ^ 2 - fp2sqr_mont(P->Z, t6); // t6 = Z1 ^ 2 - fp2add(t0, t6, t0); // t0 = t0 + t6 - fp2add(t1, t1, t1); // t1 = t1 + t1 - fp2mul_mont(QY, P->X, t7); // t7 = y2*X1 - fp2mul_mont(QX, P->Y, t6); // t6 = x2*Y1 - fp2sub(t7, t6, t7); // t7 = t7 - t6 - fp2mul_mont(t1, t7, t1); // t1 = t1*t7 - fp2mul_mont(A, t2, t7); // t7 = A*t2 - fp2add(t7, t4, t4); // t4 = t4 + t7 - fp2mul_mont(t1, t4, t4); // t4 = t1*t4 - fp2mul_mont(QY, QZ, t1); // t1 = y2*z2 - fp2mul_mont(t0, t1, t0); // t0 = t0*t1 - fp2sqr_mont(QZ, t1); // t1 = z2 ^ 2 - fp2sqr_mont(QX, t6); // t6 = x2 ^ 2 - fp2add(t1, t6, t1); // t1 = t1 + t6 - fp2mul_mont(P->Z, P->Y, t6); // t6 = Z1*Y1 - fp2mul_mont(t1, t6, t1); // t1 = t1*t6 - fp2sub(t0, t1, t0); // t0 = t0 - t1 - fp2mul_mont(t2, t0, t0); // t0 = t2*t0 - fp2mul_mont(t5, t3, R->X); // X3 = t5*t3 - fp2add(t4, t0, R->Y); // Y3 = t4 + t0 - fp2sqr_mont(t3, t0); // t0 = t3 ^ 2 - fp2mul_mont(t3, t0, R->Z); // Z3 = t3*t0 -} - - -void Mont_ladder(const f2elm_t x, const digit_t* m, point_proj_t P, point_proj_t Q, const f2elm_t A24, const unsigned int order_bits, const unsigned int order_fullbits) -{ // The Montgomery ladder - // Inputs: the affine x-coordinate of a point P on E: B*y^2=x^3+A*x^2+x, - // scalar m - // curve constant A24 = (A+2)/4 - // order_bits = subgroup order bitlength - // order_fullbits = smallest multiple of 32 larger than the order bitlength - // Output: P = m*(x:1) - unsigned int bit = 0, owords = NBITS_TO_NWORDS(order_fullbits); - digit_t mask, scalar[NWORDS_ORDER]; - int i; - - // Initializing with the points (1:0) and (x:1) - fpcopy((digit_t*)&Montgomery_one, (digit_t*)P->X[0]); - fpzero(P->X[1]); - fp2zero(P->Z); - - fp2copy(x, Q->X); - fpcopy((digit_t*)&Montgomery_one, (digit_t*)Q->Z[0]); - fpzero(Q->Z[1]); - - for (i = NWORDS_ORDER-1; i >= 0; i--) { - scalar[i] = m[i]; - } - - for (i = order_fullbits-order_bits; i > 0; i--) { - mp_shiftl1(scalar, owords); - } - - for (i = order_bits; i > 0; i--) { - bit = (unsigned int)(scalar[owords-1] >> (RADIX-1)); - mp_shiftl1(scalar, owords); - mask = 0-(digit_t)bit; - - swap_points(P, Q, mask); - xDBLADD(P, Q, x, A24); // If bit=0 then P <- 2*P and Q <- P+Q, - swap_points(P, Q, mask); // else if bit=1 then Q <- 2*Q and P <- P+Q - } -} - - -void mont_twodim_scalarmult(digit_t* a, const point_t R, const point_t S, const f2elm_t A, const f2elm_t A24, point_full_proj_t P, const unsigned int order_bits) -{ // Computes P = R + [a]S - point_proj_t P0 = {0}, P1 = {0}; - point_full_proj_t P2 = {0}; - f2elm_t one = {0}; - - fpcopy((digit_t*)&Montgomery_one, one[0]); - Mont_ladder(S->x, a, P0, P1, A24, order_bits, MAXBITS_ORDER); - recover_os(P0->X, P0->Z, P1->X, P1->Z, S->x, S->y, A, P2->X, P2->Y, P2->Z); - ADD(P2, R->x, R->y, one, A, P); -} - - -void xDBLADD_proj(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24) -{ // Simultaneous doubling and differential addition. - // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. - // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. - f2elm_t t0, t1, t2; - - fp2add(P->X, P->Z, t0); // t0 = XP+ZP - fp2sub(P->X, P->Z, t1); // t1 = XP-ZP - fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 - fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ - fp2correction(t2); - fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ - fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) - fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 - fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) - fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 - fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 - fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] - fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) - fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 - fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) - fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] - fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 - fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 - fp2mul_mont(Q->X, ZPQ, Q->X); // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 - fp2mul_mont(Q->Z, XPQ, Q->Z); // ZQ = XPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 -} - - void xDBL_e(const point_proj_t P, point_proj_t Q, const f2elm_t A24, const int e) { // Doubling of a Montgomery point in projective coordinates (X:Z) over affine curve coefficient A. // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants (A+2)/4. @@ -635,13 +545,110 @@ void xDBL_e(const point_proj_t P, point_proj_t Q, const f2elm_t A24, const int e } -void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsigned int order_bits, point_proj_t R) +static void eval_dual_4_isog_shared(const f2elm_t X4pZ4, const f2elm_t X42, const f2elm_t Z42, f2elm_t* coeff) +{ + fp2sub(X42, Z42, coeff[0]); + fp2add(X42, Z42, coeff[1]); + fp2sqr_mont(X4pZ4, coeff[2]); + fp2sub(coeff[2], coeff[1], coeff[2]); +} + + +static void eval_dual_4_isog(const f2elm_t A24, const f2elm_t C24, const f2elm_t* coeff, point_proj_t P) +{ + f2elm_t t0, t1, t2, t3; + + fp2add(P->X, P->Z, t0); + fp2sub(P->X, P->Z, t1); + fp2sqr_mont(t0, t0); + fp2sqr_mont(t1, t1); + fp2sub(t0, t1, t2); + fp2sub(C24, A24, t3); + fp2mul_mont(t2, t3, t3); + fp2mul_mont(C24, t0, t2); + fp2sub(t2, t3, t2); + fp2mul_mont(t2, t0, P->X); + fp2mul_mont(t3, t1, P->Z); + fp2mul_mont(coeff[0], P->X, P->X); + fp2mul_mont(coeff[1], P->Z, t0); + fp2add(P->X, t0, P->X); + fp2mul_mont(coeff[2], P->Z, P->Z); +} + + +static void get_4_isog_dual(const point_proj_t P, f2elm_t A24, f2elm_t C24, f2elm_t* coeff) +{ + fp2sub(P->X, P->Z, coeff[1]); + fp2add(P->X, P->Z, coeff[2]); + fp2sqr_mont(P->Z, coeff[4]); + fp2add(coeff[4], coeff[4], coeff[0]); + fp2sqr_mont(coeff[0], C24); + fp2add(coeff[0], coeff[0], coeff[0]); + fp2sqr_mont(P->X, coeff[3]); + fp2add(coeff[3], coeff[3], A24); + fp2sqr_mont(A24, A24); +} + + +#if (OALICE_BITS % 2 == 1) + +static void eval_dual_2_isog(const f2elm_t X2, const f2elm_t Z2, point_proj_t P) +{ + f2elm_t t0; + + fp2add(P->X, P->Z, t0); + fp2sub(P->X, P->Z, P->Z); + fp2sqr_mont(t0, t0); + fp2sqr_mont(P->Z, P->Z); + fp2sub(t0, P->Z, P->Z); + fp2mul_mont(X2, P->Z, P->Z); + fp2mul_mont(Z2, t0, P->X); +} + +#endif + + +static void eval_final_dual_2_isog(point_proj_t P) +{ + f2elm_t t0, t1; + felm_t t2; + + fp2add(P->X, P->Z, t0); + fp2mul_mont(P->X, P->Z, t1); + fp2sqr_mont(t0, P->X); + fpcopy((P->X)[0], t2); + fpcopy((P->X)[1], (P->X)[0]); + fpcopy(t2, (P->X)[1]); + fpneg((P->X)[1]); + fp2add(t1, t1, P->Z); + fp2add(P->Z, P->Z, P->Z); +} + + +static void eval_full_dual_4_isog(const f2elm_t As[][5], point_proj_t P) { + // First all 4-isogenies + for(unsigned int i = 0; i < MAX_Alice; i++) { + eval_dual_4_isog(As[MAX_Alice-i][0], As[MAX_Alice-i][1], *(As+MAX_Alice-i-1)+2, P); + } +#if (OALICE_BITS % 2 == 1) + eval_dual_2_isog(As[MAX_Alice][2], As[MAX_Alice][3], P); +#endif + eval_final_dual_2_isog(P); // to A = 0 +} + + +void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsigned int order_bits, point_proj_t R) +{ // The Montgomery ladder + // Inputs: a projective point P=[X:Z] on E: B*y^2=x^3+A*x^2+x, + // scalar m + // curve coefficient A + // order_bits = subgroup order bitlength + // Output: R = m*(X:Z) point_proj_t R0, R1; f2elm_t A24 = {0}; - unsigned int bit = 0; digit_t mask; - int j, swap, prevbit = 0; + unsigned int bit = 0, prevbit = 0, j, swap; fpcopy((digit_t*)&Montgomery_one, A24[0]); fpadd(A24[0], A24[0], A24[0]); @@ -665,7 +672,7 @@ void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsig mask = 0 - (digit_t)swap; swap_points(R0, R1, mask); - xDBLADD_proj(R0, R1, P->X, P->Z, A24); + xDBLADD(R0, R1, P->X, P->Z, A24); } swap = 0 ^ prevbit; mask = 0 - (digit_t)swap; @@ -675,4 +682,39 @@ void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsig fp2copy(R0->Z, R->Z); } + +static void Ladder3pt_dual(const point_proj_t *Rs, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t A24) +{ // Project 3-point ladder + point_proj_t R0 = {0}, R2 = {0}; + digit_t mask; + int i, nbits, bit, swap, prevbit = 0; + + if (AliceOrBob == ALICE) { + nbits = OALICE_BITS; + } else { + nbits = OBOB_BITS; + } + + fp2copy(Rs[1]->X, R0->X); + fp2copy(Rs[1]->Z, R0->Z); + fp2copy(Rs[2]->X, R2->X); + fp2copy(Rs[2]->Z, R2->Z); + fp2copy(Rs[0]->X, R->X); + fp2copy(Rs[0]->Z, R->Z); + + // Main loop + for (i = 0; i < nbits; i++) { + bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (digit_t)swap; + + swap_points(R, R2, mask); + xDBLADD(R0, R2, R->X, R->Z, A24); + } + swap = 0 ^ prevbit; + mask = 0 - (digit_t)swap; + swap_points(R, R2, mask); +} + #endif \ No newline at end of file diff --git a/src/fpx.c b/src/fpx.c index 722f40a..72629f8 100644 --- a/src/fpx.c +++ b/src/fpx.c @@ -969,7 +969,7 @@ unsigned char is_sqr_fp2(const f2elm_t a, felm_t s) fpsqr_mont(s,temp); // s = z^((p+1)/4) fpcorrection(temp); fpcorrection(z); - if (ct_compare((unsigned char*)temp, (unsigned char*)z, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) // s^2 !=? z + if (memcmp(temp, z, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) // s^2 !=? z return 0; return 1; @@ -1004,7 +1004,7 @@ void sqrt_Fp2(const f2elm_t u, f2elm_t y) fpcorrection(t0); fpcorrection(t3); - if (ct_compare((unsigned char*)t0, (unsigned char*)t3, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { + if (memcmp(t0, t3, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { fpcopy(t1, y[0]); fpcopy(t2, y[1]); } else { diff --git a/src/internal.h b/src/internal.h index 7d0ceec..924f246 100644 --- a/src/internal.h +++ b/src/internal.h @@ -70,7 +70,7 @@ void mont_n_way_inv(const f2elm_t* vec, const int n, f2elm_t* out); void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv); // Simultaneous doubling and differential addition. -void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24); +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24); // Doubling of a Montgomery point in projective coordinates (X:Z). void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24);