From 5852de4d05ab791b26ed8c236cfca57fd1ee894f Mon Sep 17 00:00:00 2001
From: geovandro <geovandro.pereira@gmail.com>
Date: Thu, 4 Feb 2021 03:04:12 -0300
Subject: [PATCH] Remove unused functions. Revert to memcmp where constant-time
 not needed. Organize compression files.

---
 src/compression/dlog.c            |  48 +--
 src/compression/pairing.c         | 333 --------------
 src/compression/sidh_compressed.c | 649 +---------------------------
 src/compression/torsion_basis.c   | 692 +++++++++++++++++++-----------
 src/ec_isogeny.c                  | 354 ++++++++-------
 src/fpx.c                         |   4 +-
 src/internal.h                    |   2 +-
 7 files changed, 667 insertions(+), 1415 deletions(-)

diff --git a/src/compression/dlog.c b/src/compression/dlog.c
index 4e113d5..36786db 100644
--- a/src/compression/dlog.c
+++ b/src/compression/dlog.c
@@ -8,7 +8,7 @@
 
 void from_base(int *D, digit_t *r, int Dlen, int base) 
 { // Convert a number in base "base" with signed digits: (D[k-1]D[k-2]...D[1]D[0])_base < 2^(NWORDS_ORDER*RADIX) into decimal 
-  // Output: r = D[k-1]*base^(k-1) + ... + D[1]*base + D[0] 
+  // Output: r = (D[k-1]*base^(k-1) + ... + D[1]*base + D[0])_10
     digit_t ell[NWORDS_ORDER] = {0}, digit[NWORDS_ORDER] = {0}, temp[NWORDS_ORDER] = {0};    
     int ellw;
     
@@ -107,11 +107,11 @@ int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp)
 
     if (is_felm_zero(y)) return 0;
     if (is_felm_zero(x)) return logT[0];
-    if (ct_compare((unsigned char *)x, (unsigned char *)y, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[1];
+    if (memcmp(x, y, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[1];
     fpcopy(y, sum);
     fpneg(sum);
     fpcorrection(sum);
-    if (ct_compare((unsigned char *)x, (unsigned char *)sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[2];    
+    if (memcmp(x, sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) return logT[2];    
     for (int j = 2; j < W_2; ++j)
     {
         for (int i = 0; i < (1<<(j-1)); ++i)
@@ -125,12 +125,12 @@ int ord2w_dlog(const felm_t *r, const int *logT, const felm_t *Texp)
                     fpsub(sum, prods[(1<<k) + (i >> (j-k-1)) - 1], sum);
             }
             fpcorrection(sum);
-            if (ct_compare((unsigned char *)x, (unsigned char *)sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+            if (memcmp(x, sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
                 return logT[(1<<j)+i-1];
             }
             fpneg(sum);
             fpcorrection(sum);
-            if (ct_compare((unsigned char *)x, (unsigned char *)sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+            if (memcmp(x, sum, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
                 return logT[(1<<(j+1))-i-1-1];
             }
         }
@@ -181,8 +181,8 @@ int ord2w_dloghyb(const felm_t *h, const int *logT, const felm_t *Texp, const fe
     if (is_felm_zero(H[0][0])) { // check if compressed Fp2 element H[0] is -1
         fpneg(one);
         fpcorrection(one);
-        if ( ((ct_compare((unsigned char *)G[0],(unsigned char *)&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) && (ct_compare((unsigned char *)H[1][0],(unsigned char *)H[1][1],NBITS_TO_NBYTES(NBITS_FIELD)) != 0)) ||
-             ((ct_compare((unsigned char *)G[0],(unsigned char *)one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) && (ct_compare((unsigned char *)H[1][0],(unsigned char *)H[1][1],NBITS_TO_NBYTES(NBITS_FIELD)) == 0))) { // check if G[0] != H[1]
+        if ( ((memcmp(G[0],&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) && (memcmp(H[1][0],H[1][1],NBITS_TO_NBYTES(NBITS_FIELD)) != 0)) ||
+             ((memcmp(G[0],one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) && (memcmp(H[1][0],H[1][1],NBITS_TO_NBYTES(NBITS_FIELD)) == 0))) { // check if G[0] != H[1]
             for (int i = 0; i <= k; ++i) inv_Fp2_cycl_proj(H[i]);
             inv = 1;
         }
@@ -205,7 +205,7 @@ int ord2w_dloghyb(const felm_t *h, const int *logT, const felm_t *Texp, const fe
             fpmul_mont(G[(1 << (j-index+ord-2)) - (1 << (j-index+ord-3)) + (i_j >> 1)], H[j][1], one);
         }
         fpcorrection(one);
-        if (ct_compare((unsigned char *)H[j][0], (unsigned char *)one, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
+        if (memcmp(H[j][0], one, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
             d += 1 << (w-1);
             i_j++;
             flag = 1;
@@ -296,18 +296,18 @@ void Traverse_w_div_e_fullsigned(const f2elm_t r, int j, int k, int z, const uns
         fp2copy(r, rp);
         fp2correction(rp);
 
-        if (is_felm_zero(rp[1]) && ct_compare((unsigned char *)rp[0],(unsigned char *)&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+        if (is_felm_zero(rp[1]) && memcmp(rp[0],&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
             D[k] = 0;
         } else {
             for (int t = 1; t <= ellw/2; t++) {
-                if (ct_compare((unsigned char*)rp, (unsigned char*)&CT[2*((Dlen - 1)*(ellw/2) + (t-1))], 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+                if (memcmp(rp, CT[2*((Dlen - 1)*(ellw/2) + (t-1))], 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
                     D[k] = -t;
                     break;
                 } else {                    
                     fp2copy(CT + 2*((Dlen - 1)*(ellw/2) + (t-1)), alpha);
                     fpneg(alpha[1]);
                     fpcorrection(alpha[1]);
-                    if (ct_compare((unsigned char*)rp, (unsigned char*)alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+                    if (memcmp(rp, alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
                         D[k] = t;
                         break;
                     }
@@ -367,19 +367,19 @@ void Traverse_w_notdiv_e_fullsigned(const f2elm_t r, int j, int k, int z, const
         fp2copy(r, rp);
         fp2correction(rp);    
 
-        if (is_felm_zero(rp[1]) && ct_compare((unsigned char *)rp[0],(unsigned char *)&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+        if (is_felm_zero(rp[1]) && memcmp(rp[0],&Montgomery_one,NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
             D[k] = 0;              
         } else {            
             if (!(j == 0 && k == Dlen - 1)) {
                 for (int t = 1; t <= (ellw/2); t++) {
-                    if (ct_compare((unsigned char*)&CT2[2*(ellw/2)*(Dlen-1) + 2*(t-1)], (unsigned char*)rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+                    if (memcmp(CT2[2*(ellw/2)*(Dlen-1) + 2*(t-1)], rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
                         D[k] = -t;
                         break;             
                     } else {
                         fp2copy(CT2 + 2*((ellw/2)*(Dlen-1) + (t-1)), alpha);
                         fpneg(alpha[1]);
                         fpcorrection(alpha[1]);
-                        if (ct_compare((unsigned char*)rp, (unsigned char*)alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+                        if (memcmp(rp, alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
                             D[k] = t;
                             break;
                         }
@@ -387,14 +387,14 @@ void Traverse_w_notdiv_e_fullsigned(const f2elm_t r, int j, int k, int z, const
                 }
             } else {            
                 for (int t = 1; t <= ell_emodw/2; t++) {     
-                    if (ct_compare((unsigned char*)&CT1[2*(ellw/2)*(Dlen - 1) + 2*(t-1)], (unsigned char*)rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { 
+                    if (memcmp(CT1[2*(ellw/2)*(Dlen - 1) + 2*(t-1)], rp, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) { 
                         D[k] = -t;
                         break;                
                     } else {
                         fp2copy(CT1 + 2*((ellw/2)*(Dlen-1) + (t-1)), alpha);
                         fpneg(alpha[1]);
                         fpcorrection(alpha[1]);
-                        if (ct_compare((unsigned char*)rp, (unsigned char*)alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+                        if (memcmp(rp, alpha, 2*NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
                             D[k] = t;
                             break;
                         }                        
@@ -430,19 +430,3 @@ void solve_dlog(const f2elm_t r, int *D, digit_t* d, int ell)
 }
 
 
-void ph2(const point_full_proj_t phiP, const point_full_proj_t phiQ, const point_t PS, const point_t QS, const f2elm_t A, digit_t* c0, digit_t* d0, digit_t* c1, digit_t* d1)
-{ // Computes the 4 coefficients of the change of basis matrix between the bases {phiP,phiQ} and {PS, QS}
-  // Assume both bases generate the full 2^eA torsion
-    f2elm_t n[4] = {0};    
-    int D[DLEN_2];
-    
-    // Compute the four pairings
-    Tate_4_pairings_2_torsion(phiP, phiQ, PS, QS, A, n);   
-    
-    solve_dlog(n[0], D, d0, 2);    
-    solve_dlog(n[2], D, c0, 2);
-    mp_sub((digit_t*)Alice_order, c0, c0, NWORDS_ORDER);    
-    solve_dlog(n[1], D, d1, 2);    
-    solve_dlog(n[3], D, c1, 2);
-    mp_sub((digit_t*)Alice_order, c1, c1, NWORDS_ORDER); 
-}
\ No newline at end of file
diff --git a/src/compression/pairing.c b/src/compression/pairing.c
index 09132a5..23e1374 100644
--- a/src/compression/pairing.c
+++ b/src/compression/pairing.c
@@ -7,51 +7,6 @@
 
 #define t_points  2
 
-void get_A_compression(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
-{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
-  // Input:  the x-coordinates xP, xQ, and xR of the points P, Q and R.
-  // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
-    f2elm_t t0, t1, one = {0};
-    
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    fp2add(xP, xQ, t1);                           // t1 = xP+xQ
-    fp2mul_mont(xP, xQ, t0);                      // t0 = xP*xQ
-    fp2mul_mont(xR, t1, A);                       // A = xR*t1
-    fp2add(t0, A, A);                             // A = A+t0
-    fp2mul_mont(t0, xR, t0);                      // t0 = t0*xR
-    fp2sub(A, one, A);                            // A = A-1
-    fp2add(t0, t0, t0);                           // t0 = t0+t0
-    fp2add(t1, xR, t1);                           // t1 = t1+xR
-    fp2add(t0, t0, t0);                           // t0 = t0+t0
-    fp2sqr_mont(A, A);                            // A = A^2
-    fp2inv_mont_bingcd(t0);                       // t0 = 1/t0
-    fp2mul_mont(A, t0, A);                        // A = A*t0
-    fp2sub(A, t1, A);                             // Afinal = A-t1
-}
-
-
-void recover_y(const publickey_t PK, point_full_proj_t phiP, point_full_proj_t phiQ, point_full_proj_t phiX, f2elm_t A)
-{ // Recover the y-coordinates of the public key
-  // The three resulting points are (simultaneously) correct up to sign
-    f2elm_t tmp, phiXY, one = {0};
-
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    get_A_compression(PK[0], PK[1], PK[2], A);  // NOTE: don't have to compress this, can output in keygen
-    
-    fp2add(PK[2], A, tmp);
-    fp2mul_mont(PK[2], tmp, tmp);
-    fp2add(tmp, one, tmp);                 
-    fp2mul_mont(PK[2], tmp, tmp);               // tmp = PK[2]^3+A*PK[2]^2+PK[2];
-    sqrt_Fp2(tmp, phiXY);  
-    fp2copy(PK[2], phiX->X);
-    fp2copy(phiXY, phiX->Y);
-    fp2copy(one, phiX->Z);                      // phiX = [PK[2],phiXY,1];
-    
-    recover_os(PK[1], one, PK[0], one, PK[2], phiXY, A, phiQ->X, phiQ->Y, phiQ->Z);      
-    fp2neg(phiXY); 
-    recover_os(PK[0], one, PK[1], one, PK[2], phiXY, A, phiP->X, phiP->Y, phiP->Z);      
-}
-
 
 static void final_exponentiation_2_torsion(f2elm_t f, const f2elm_t finv, f2elm_t fout)
 { // The final exponentiation for pairings in the 2^eA-torsion group. Raising the value f to the power (p^2-1)/2^eA.
@@ -72,171 +27,6 @@ static void final_exponentiation_2_torsion(f2elm_t f, const f2elm_t finv, f2elm_
 }
 
 
-void Tate_pairings_2_torsion(const point_full_proj_t P, point_full_proj_t *Qj, f2elm_t a, f2elm_t* n)
-{ // Compute the reduced Tate pairings e_{2^m}(P, Q_j) for the curve y^2 = x^3 + a*x + b:
-    f2elm_t h[t_points], one = {0};
-    f2elm_t X, Y, Z, X2, Y2, Y4, M, S, T, temp;
-    f2elm_t Xp, Yp, Zp, Tp;
-    f2elm_t L, W, g;
-    
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    fp2copy(P->X, X);
-    fp2copy(P->Y, Y);
-    fp2copy(P->Z, Z);
-    fp2sqr_mont(Z, T);
-
-    for (int j = 0; j < t_points; j++) {
-        fp2copy(one, n[j]);
-        fp2mul_mont(T, Qj[j]->X, temp);
-        fp2sub(temp, X, h[j]);
-    }
-    
-    for (int k = 0; k < OALICE_BITS; k++) {
-        // Point doubling and line function construction:
-        fp2sqr_mont(X, X2);
-        fp2sqr_mont(Y, Y2);
-        fp2sqr_mont(Y2, Y4);
-        fp2sqr_mont(T, temp);
-        fp2mul_mont(a, temp, temp);
-        fp2add(X2, X2, M);
-        fp2add(M, X2, M);
-        fp2add(M, temp, M);      // M = 3*X_2 + a*T^2
-        fp2add(X, Y2, S);
-        fp2sqr_mont(S, S);
-        fp2sub(S, X2, S);
-        fp2sub(S, Y4, S);
-        fp2add(S, S, S);         // S = 2*((X + Y2)^2 - X2 - Y4)     
-        fp2sqr_mont(M, temp);
-        fp2add(S, S, Xp);
-        fp2sub(temp, Xp, Xp);    // Xp = M^2 - 2*S
-        fp2sub(S, Xp, temp);
-        fp2mul_mont(M, temp, temp);
-        fp2shl(Y4, 3, Yp);
-        fp2sub(temp, Yp, Yp);    // Yp = M*(S - Xp) - 8*Y4
-        fp2add(Y, Z, temp);
-        fp2sqr_mont(temp, temp);
-        fp2sub(temp, Y2, temp);
-        fp2sub(temp, T, Zp);     // Zp = (Y + Z)^2 - Y2 - T
-        fp2sqr_mont(Zp, Tp);     // Tp = Zp^2      
-        fp2mul_mont(Zp, T, L);   // L = Zp*T
-        fp2add(Y2, Y2, W);       // W = 2*Y2
-        
-        fp2correction(Zp);
-        if (is_felm_zero(Zp[0]) && is_felm_zero(Zp[1])) {  // Doubling exception for points in 2*E
-            fp2copy(one, Xp);
-            fp2copy(one, Yp);            
-        }
-
-        // Line function evaluation and accumulation:
-        for (int j = 0; j < t_points; j++) {
-            if (!is_felm_zero(Zp[0]) || !is_felm_zero(Zp[1])) {
-                fp2mul_mont(M, h[j], temp);
-                fp2add(temp, W, temp);
-                fp2mul_mont(L, Qj[j]->Y, g);
-                fp2sub(temp, g, g);              // g = M*hj + W - L*Y_{Qj}
-                fp2mul_mont(Tp, Qj[j]->X, temp);
-                fp2sub(temp, Xp, h[j]);          // hj = Tp*X_{Qj} - Xp
-                fp2_conj(h[j], temp);
-                fp2mul_mont(temp, g, g);         // g = g*hj^*
-            } else {
-                fp2copy(h[j], g);
-            }
-            fp2sqr_mont(n[j], n[j]);
-            fp2mul_mont(n[j], g, n[j]);
-        }
-        fp2copy(Xp, X);
-        fp2copy(Yp, Y);
-        fp2copy(Zp, Z);
-        fp2copy(Tp, T);
-    }
-    
-    // Final exponentiation:
-    mont_n_way_inv(n, t_points, h);
-    for (int j = 0; j < t_points; j++) {
-        fp2correction(Qj[j]->Z);
-        if (is_felm_zero(Qj[j]->Z[0]) && is_felm_zero(Qj[j]->Z[1]))
-            fp2copy(one, n[j]);
-        else {            
-            final_exponentiation_2_torsion(n[j], h[j], n[j]);
-        }
-    }
-}
-
-
-void Monty2Weier(const f2elm_t A, f2elm_t a, f2elm_t b)
-{ // Convert a Montgomery curve EM: y^2 = x^3 + A*x^2 + x into its short Weierstrass form EW: v^2 = u^3 + a*u + b.
-    f2elm_t one = {0}, temp, A2, AA;
-    
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    // a = 1 - A^2/3
-    fp2sqr_mont(A, A2);
-    fpmul_mont(A2[0], (digit_t*)threeinv, temp[0]);
-    fpmul_mont(A2[1], (digit_t*)threeinv, temp[1]);
-    fp2sub(one, temp, a);
-    
-    // b = (2*A^3 - 9*A)/27
-    fp2add(A, A, AA);           // AA = 2*A
-    fp2mul_mont(AA, A2, temp);  // temp = 2A^3
-    fp2add(AA, AA, A2);         // A2 = 4*A
-    fp2add(A2, A2, A2);         // A2 = 8*A
-    fp2add(A2, A, A2);          // A2 = 9*A
-    fp2sub(temp, A2, b);        // b = 2A^3 - 9*A
-    fpmul_mont((digit_t*)threeinv, (digit_t*)threeinv, temp[0]);
-    fpmul_mont(temp[0], (digit_t*)threeinv, temp[0]);
-    fpmul_mont(temp[0], b[0], b[0]);
-    fpmul_mont(temp[0], b[1], b[1]);
-}
-
-
-void PointMonty2Weier(const point_full_proj_t PM, point_full_proj_t PW, const f2elm_t A)
-{ // Convert a point PM on a Montgomery curve y^2 = x^3 + A*x^2 + x into the corresponding point on its short Weierstrass form EW.
-    f2elm_t zero = {0}, one = {0}, temp;
-    
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    
-    if (is_felm_zero(PM->Z[0]) && is_felm_zero(PM->Z[1])) {
-        fp2copy(zero,PW->X);
-        fp2copy(one, PW->Y);
-        fp2copy(zero,PW->Z);
-        return;
-    }
-    fpmul_mont((digit_t*)threeinv, A[0], temp[0]);
-    fpmul_mont((digit_t*)threeinv, A[1], temp[1]);
-    
-    // PW = EW(PM->X + A/3, PM->Y, 1)
-    fp2add(temp, PM->X, PW->X);
-    fp2copy(PM->Y, PW->Y);
-    fp2copy(one, PW->Z);
-}
-
-
-void Tate_4_pairings_2_torsion(const point_full_proj_t P, const point_full_proj_t Q, const point_t S1, const point_t S2, const f2elm_t A, f2elm_t* n)
-{ // The doubling only 2-torsion Tate pairing of order 2^eA, consisting of the doubling only Miller loop and the final exponentiation.]
-  // Computes 4 pairings at once: e(P, S1), e(P, S2), e(Q, S1), e(Q, S2).
-    point_full_proj_t Qj[2], PW, QW, QjW[2];
-    f2elm_t a, b, one = {0};
-    
-    fpcopy((digit_t*)&Montgomery_one, one[0]);    
-    Monty2Weier(A, a, b);
-    
-    // Assume S1 and S2 are normalized
-    fp2copy(S1->x, Qj[0]->X);
-    fp2copy(S1->y, Qj[0]->Y);
-    fp2copy(one, Qj[0]->Z);
-    fp2copy(S2->x, Qj[1]->X);
-    fp2copy(S2->y, Qj[1]->Y);
-    fp2copy(one, Qj[1]->Z);
-    
-    PointMonty2Weier(P, PW, A);
-    PointMonty2Weier(Q, QW, A);
-    PointMonty2Weier(Qj[0], QjW[0], A);
-    PointMonty2Weier(Qj[1], QjW[1], A);
-    
-    Tate_pairings_2_torsion(PW, QjW, a, n);
-    Tate_pairings_2_torsion(QW, QjW, a, n + 2);
-}
-
-
 static void final_exponentiation_3_torsion(f2elm_t f, const f2elm_t finv, f2elm_t fout)
 { // The final exponentiation for pairings in the 3-torsion group. Raising the value f to the power (p^2-1)/3^eB.
     felm_t one = {0};
@@ -256,129 +46,6 @@ static void final_exponentiation_3_torsion(f2elm_t f, const f2elm_t finv, f2elm_
 }
 
 
-void Tate_pairings_3_torsion(const point_full_proj_t P, point_full_proj_t *Qj, f2elm_t a, f2elm_t* n)
-{ // The tripling only 3-torsion Tate pairing of order 3^eB, consisting of the tripling only Miller loop and the final exponentiation. 
-  // Computes 4 pairings at once: e(P, R1), e(P, R2), e(Q, R1), e(Q, R2).
-    f2elm_t h[t_points], one = {0};
-    f2elm_t X, Y, Z, X2, Y2, Y4, M, S, T;
-    f2elm_t Xp, Yp, Zp, Tp, D, U, Up, Fp;
-    f2elm_t L, W, Wp, g, T2, M2, F, F2, d;
-    f2elm_t temp, temp1;
-    
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    fp2copy(P->X, X);
-    fp2copy(P->Y, Y);
-    fp2copy(P->Z, Z);
-    fp2sqr_mont(Z, T);
-
-    for (int j = 0; j < t_points; j++) {
-        fp2copy(one, n[j]);
-        fp2mul_mont(T, Qj[j]->X, temp);
-        fp2sub(temp, X, h[j]);
-    }    
-    
-    for (int k = 0; k < OBOB_EXPON; k++) {
-        // Point tripling and parabola function construction:        
-        fp2sqr_mont(X, X2);
-        fp2sqr_mont(Y, Y2);
-        fp2sqr_mont(Y2, Y4);
-        fp2sqr_mont(T, T2);
-        fp2add(X2, X2, temp);
-        fp2add(temp, X2, temp);
-        fp2mul_mont(a, T2, M);
-        fp2add(temp, M, M);      // M = 3*X2 + a*T2
-        fp2sqr_mont(M, M2);
-        fp2add(X, Y2, temp);
-        fp2sqr_mont(temp, temp);
-        fp2sub(temp, X2, temp);
-        fp2sub(temp, Y4, D);     // D = (X + Y2)^2 - X2 - Y4
-        fp2add(D, D, temp);
-        fp2add(temp, D, temp);
-        fp2add(temp, temp, temp);
-        fp2sub(temp, M2, F);     // F = 6*D - M2
-        fp2sqr_mont(F, F2);
-        fp2add(Y2, Y2, W);
-        fp2add(W, W, Wp);
-        fp2shl(Y4, 4, S);
-        fp2add(M, F, temp);
-        fp2sqr_mont(temp, temp);
-        fp2sub(temp, M2, temp);
-        fp2sub(temp, F2, temp);
-        fp2sub(temp, S, U);      // U = (M + F)^2 - M2 - F2 - S
-        fp2sub(S, U, Up);
-        fp2mul_mont(X, F2, temp);
-        fp2mul_mont(Wp, U, Xp);       
-        fp2sub(temp, Xp, Xp);        
-        fp2shl(Xp, 2, Xp);       // Xp = 4*(X*F2 - Wp*U)       
-        fp2mul_mont(U, Up, temp);
-        fp2mul_mont(F, F2, Yp);
-        fp2sub(temp, Yp, Yp);
-        fp2mul_mont(Y, Yp, Yp);
-        fp2shl(Yp, 3, Yp);       // Yp = 8*Y*(U*Up - F*F2)
-        fp2add(Z, F, temp);
-        fp2sqr_mont(temp, temp);
-        fp2sub(temp, T, temp);
-        fp2sub(temp, F2, Zp);    // Zp = (Z + F)^2 - T - F2
-        fp2sqr_mont(Zp, Tp);             
-        fp2add(Y, Z, temp);
-        fp2sqr_mont(temp, temp);
-        fp2sub(temp, Y2, temp);
-        fp2sub(temp, T, temp);     
-        fp2mul_mont(temp, T, L);
-        fp2add(F, F, Fp);       
-        
-        fp2correction(Zp);
-        if (is_felm_zero(Zp[0]) && is_felm_zero(Zp[1])) {
-            fp2copy(one, Xp);
-            fp2copy(one, Yp);
-        }
-        
-        // Parabola function evaluation and accumulation
-        for (int j = 0; j < t_points; j++) {
-            fp2mul_mont(L, Qj[j]->Y, temp);
-            fp2sub(W, temp, d);       
-            if (!is_felm_zero(Zp[0]) || !is_felm_zero(Zp[1])) {
-                fp2mul_mont(M, h[j], temp);                 
-                fp2add(d, temp, g);          // g = (M*h + d)           
-                fp2mul_mont(Up, h[j], temp);
-                fp2mul_mont(Fp, d, temp1);
-                fp2add(temp, temp1, temp1);
-                fp2mul_mont(temp1, g, g);    // g = (M*h + d)*(Up*h + Fp*d)
-                fp2mul_mont(Wp, h[j], temp);
-                fp2add(F, temp, temp);
-                fp2_conj(temp, temp);
-                fp2mul_mont(temp, g, g);     // g = (M*h + d)*(Up*h + Fp*d)*(Wp*h + F)^*
-                fp2mul_mont(Tp, Qj[j]->X, temp);
-                fp2sub(temp, Xp, h[j]);      // h = Tp*X_{Qj} - Xp
-                fp2_conj(h[j], temp);
-                fp2mul_mont(temp, g, g);
-            } else {     
-                fp2mul_mont(M, h[j], temp);
-                fp2add(temp, d, g);             
-            }
-            fp2sqr_mont(n[j], temp);
-            fp2mul_mont(temp, n[j], n[j]);
-            fp2mul_mont(g, n[j], n[j]);
-        }
-        fp2copy(Xp, X);
-        fp2copy(Yp, Y);
-        fp2copy(Zp, Z);
-        fp2copy(Tp, T);        
-    }
-    
-    // Final exponentiation:
-    mont_n_way_inv(n, t_points, h); 
-    for (int j = 0; j < t_points; j++) {
-        fp2correction(Qj[j]->Z);        
-        if (is_felm_zero(Qj[j]->Z[0]) && is_felm_zero(Qj[j]->Z[1]))
-            fp2copy(one, n[j]);
-        else {
-            final_exponentiation_3_torsion(n[j], h[j], n[j]);
-        }
-    }    
-}
-
-
 void Tate3_pairings(point_full_proj_t *Qj, f2elm_t* f)
 {
     felm_t *x, *y, *l1, *l2, *n1, *n2, *x2, *x23, *x2p3;
diff --git a/src/compression/sidh_compressed.c b/src/compression/sidh_compressed.c
index 17a4b6c..e3357d6 100644
--- a/src/compression/sidh_compressed.c
+++ b/src/compression/sidh_compressed.c
@@ -7,8 +7,7 @@
 #include "../random/random.h"
 #include <string.h>
 
-#define COMPRESSION 0
-#define DECOMPRESSION 1
+
 
 
 static void init_basis(digit_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR)
@@ -51,472 +50,6 @@ void random_mod_order_B(unsigned char* random_digits)
 }
 
 
-static void Ladder3pt_dual(const point_proj_t *Rs, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t A24)
-{ // Project 3-point ladder
-    point_proj_t R0 = {0}, R2 = {0};
-    digit_t mask;
-    int i, nbits, bit, swap, prevbit = 0;
-
-    if (AliceOrBob == ALICE) {
-        nbits = OALICE_BITS;
-    } else {
-        nbits = OBOB_BITS;
-    }
-
-    fp2copy(Rs[1]->X, R0->X);
-    fp2copy(Rs[1]->Z, R0->Z);
-    fp2copy(Rs[2]->X, R2->X);
-    fp2copy(Rs[2]->Z, R2->Z);
-    fp2copy(Rs[0]->X, R->X);
-    fp2copy(Rs[0]->Z, R->Z);
-
-    // Main loop
-    for (i = 0; i < nbits; i++) {
-        bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1;
-        swap = bit ^ prevbit;
-        prevbit = bit;
-        mask = 0 - (digit_t)swap;
-
-        swap_points(R, R2, mask);
-        xDBLADD(R0, R2, R->X, A24);
-        fp2mul_mont(R2->X, R->Z, R2->X);
-    }
-    swap = 0 ^ prevbit;
-    mask = 0 - (digit_t)swap;
-    swap_points(R, R2, mask);
-}
-
-
-static void Elligator2(const f2elm_t a24, const unsigned int r, f2elm_t x, unsigned char *bit, const unsigned char COMPorDEC)
-{ // Generate an x-coordinate of a point on curve with (affine) coefficient a24 
-  // Use the counter r
-    int i;
-    felm_t one_fp, a2, b2, N, temp0, temp1;
-    f2elm_t A, y2, *t_ptr;
-
-    fpcopy((digit_t*)&Montgomery_one, one_fp);
-    fp2add(a24, a24, A);
-    fpsub(A[0], one_fp, A[0]);
-    fp2add(A, A, A);                       // A = 4*a24-2 
-
-    // Elligator computation
-    t_ptr = (f2elm_t *)&v_3_torsion[r];    
-    fp2mul_mont(A, (felm_t*)t_ptr, x);     // x = A*v; v := 1/(1 + U*r^2) table lookup
-    fp2neg(x);                             // x = -A*v;
-    
-    if (COMPorDEC == COMPRESSION) {
-        fp2add(A, x, y2);                      // y2 = x + A
-        fp2mul_mont(y2,  x,  y2);              // y2 = x*(x + A)
-        fpadd(y2[0],  one_fp,  y2[0]);         // y2 = x(x + A) + 1
-        fp2mul_mont(x, y2, y2);                // y2 = x*(x^2 + Ax + 1);
-        fpsqr_mont(y2[0], a2);
-        fpsqr_mont(y2[1], b2);
-        fpadd(a2, b2, N);                      // N := norm(y2);
-
-        fpcopy(N, temp0);
-        for (i = 0; i < OALICE_BITS - 2; i++) {    
-            fpsqr_mont(temp0,  temp0);
-        }
-        for (i = 0; i < OBOB_EXPON; i++) {
-            fpsqr_mont(temp0,  temp1);
-            fpmul_mont(temp0,  temp1,  temp0);
-        }
-        fpsqr_mont(temp0, temp1);              // z = N^((p + 1) div 4);
-        fpcorrection(temp1);
-        fpcorrection(N);
-        if (ct_compare((unsigned char*)temp1, (unsigned char*)N, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
-            fp2neg(x);
-            fp2sub(x, A, x);                   // x = -x - A;
-            if (COMPorDEC == COMPRESSION)
-                *bit = 1;        
-        }
-    } else {
-        if (*bit) {
-            fp2neg(x);
-            fp2sub(x,A,x);                              // x = -x - A;
-        }       
-    }
-}
-
-
-static void make_positive(f2elm_t x)
-{
-    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
-    felm_t zero = {0};
-
-    from_fp2mont(x, x);
-    if (ct_compare((unsigned char*)x[0], (unsigned char*)zero, (size_t)nbytes) != 0) {
-        if ((x[0][0] & 1) == 1)
-            fp2neg(x);
-    } else {
-        if ((x[1][0] & 1) == 1)
-            fp2neg(x);
-    }
-    to_fp2mont(x, x);
-}
-
-
-static void BiQuad_affine(const f2elm_t a24, const f2elm_t x0, const f2elm_t x1, point_proj_t R)
-{
-    f2elm_t Ap2, aa, bb, cc, t0, t1;
-
-    fp2add(a24, a24, Ap2);
-    fp2add(Ap2, Ap2, Ap2);    // Ap2 = a+2 = 4*a24
-
-    fp2sub(x0, x1, aa);
-    fp2sqr_mont(aa, aa);
-
-    fp2mul_mont(x0, x1, cc);
-    fpsub(cc[0], (digit_t*)Montgomery_one, cc[0]);
-    fp2sqr_mont(cc, cc);
-
-    fpsub(x0[0], (digit_t*)Montgomery_one, bb[0]);
-    fpcopy(x0[1], bb[1]);
-    fp2sqr_mont(bb, bb);
-    fp2mul_mont(Ap2, x0, t0);
-    fp2add(bb, t0, bb);
-    fp2mul_mont(x1, bb, bb);
-    fpsub(x1[0], (digit_t*)Montgomery_one, t0[0]);
-    fpcopy(x1[1], t0[1]);
-    fp2sqr_mont(t0, t0);
-    fp2mul_mont(Ap2, x1, t1);
-    fp2add(t0, t1, t0);
-    fp2mul_mont(x0, t0, t0);
-    fp2add(bb, t0, bb);
-    fp2add(bb, bb, bb);
-
-    fp2sqr_mont(bb, t0);
-    fp2mul_mont(aa, cc, t1);
-    fp2add(t1, t1, t1);
-    fp2add(t1, t1, t1);
-    fp2sub(t0, t1, t0);
-    sqrt_Fp2(t0, t0);
-    make_positive(t0);    // Make the sqrt "positive"
-    fp2add(bb, t0, R->X);
-    fp2add(aa, aa, R->Z);
-}
-
-
-static void get_4_isog_dual(const point_proj_t P, f2elm_t A24, f2elm_t C24, f2elm_t* coeff)
-{
-    fp2sub(P->X, P->Z, coeff[1]);
-    fp2add(P->X, P->Z, coeff[2]);
-    fp2sqr_mont(P->Z, coeff[4]);
-    fp2add(coeff[4], coeff[4], coeff[0]);
-    fp2sqr_mont(coeff[0], C24);
-    fp2add(coeff[0], coeff[0], coeff[0]);
-    fp2sqr_mont(P->X, coeff[3]);
-    fp2add(coeff[3], coeff[3], A24);
-    fp2sqr_mont(A24, A24);
-}
-
-#if (OALICE_BITS % 2 == 1)
-
-static void eval_dual_2_isog(const f2elm_t X2, const f2elm_t Z2, point_proj_t P)
-{
-    f2elm_t t0;
-
-    fp2add(P->X, P->Z, t0);
-    fp2sub(P->X, P->Z, P->Z);
-    fp2sqr_mont(t0, t0);
-    fp2sqr_mont(P->Z, P->Z);
-    fp2sub(t0, P->Z, P->Z);
-    fp2mul_mont(X2, P->Z, P->Z);
-    fp2mul_mont(Z2, t0, P->X);
-}
-
-#endif
-
-static void eval_final_dual_2_isog(point_proj_t P)
-{
-    f2elm_t t0, t1;
-    felm_t t2;
-
-    fp2add(P->X, P->Z, t0);
-    fp2mul_mont(P->X, P->Z, t1);
-    fp2sqr_mont(t0, P->X);
-    fpcopy((P->X)[0], t2);
-    fpcopy((P->X)[1], (P->X)[0]);
-    fpcopy(t2, (P->X)[1]);
-    fpneg((P->X)[1]);
-    fp2add(t1, t1, P->Z);
-    fp2add(P->Z, P->Z, P->Z);
-}
-
-
-static void eval_dual_4_isog_shared(const f2elm_t X4pZ4, const f2elm_t X42, const f2elm_t Z42, f2elm_t* coeff)
-{
-    fp2sub(X42, Z42, coeff[0]);
-    fp2add(X42, Z42, coeff[1]);
-    fp2sqr_mont(X4pZ4, coeff[2]);
-    fp2sub(coeff[2], coeff[1], coeff[2]);
-}
-
-
-static void eval_dual_4_isog(const f2elm_t A24, const f2elm_t C24, const f2elm_t* coeff, point_proj_t P)
-{
-    f2elm_t t0, t1, t2, t3;
-
-    fp2add(P->X, P->Z, t0);
-    fp2sub(P->X, P->Z, t1);
-    fp2sqr_mont(t0, t0);
-    fp2sqr_mont(t1, t1);
-    fp2sub(t0, t1, t2);
-    fp2sub(C24, A24, t3);
-    fp2mul_mont(t2, t3, t3);
-    fp2mul_mont(C24, t0, t2);
-    fp2sub(t2, t3, t2);
-    fp2mul_mont(t2, t0, P->X);
-    fp2mul_mont(t3, t1, P->Z);
-    fp2mul_mont(coeff[0], P->X, P->X);
-    fp2mul_mont(coeff[1], P->Z, t0);
-    fp2add(P->X, t0, P->X);
-    fp2mul_mont(coeff[2], P->Z, P->Z);
-}
-
-
-static void eval_full_dual_4_isog(const f2elm_t As[][5], point_proj_t P)
-{
-    // First all 4-isogenies
-    for(unsigned int i = 0; i < MAX_Alice; i++) {
-        eval_dual_4_isog(As[MAX_Alice-i][0], As[MAX_Alice-i][1], *(As+MAX_Alice-i-1)+2, P);
-    }
-#if (OALICE_BITS % 2 == 1)
-    eval_dual_2_isog(As[MAX_Alice][2], As[MAX_Alice][3], P);
-#endif
-    eval_final_dual_2_isog(P);    // to A = 0
-}
-
-
-static void TripleAndParabola_proj(const point_full_proj_t R, f2elm_t l1x, f2elm_t l1z)
-{
-    fp2sqr_mont(R->X, l1z);
-    fp2add(l1z, l1z, l1x);
-    fp2add(l1x, l1z, l1x);
-    fpadd(l1x[0], (digit_t*)&Montgomery_one, l1x[0]);
-    fp2add(R->Y, R->Y, l1z);
-}
-
-
-static void Tate3_proj(const point_full_proj_t P, const point_full_proj_t Q, f2elm_t gX, f2elm_t gZ)
-{
-    f2elm_t t0, l1x;
-
-    TripleAndParabola_proj(P, l1x, gZ);
-    fp2sub(Q->X, P->X, gX);
-    fp2mul_mont(l1x, gX, gX);
-    fp2sub(P->Y, Q->Y, t0);
-    fp2mul_mont(gZ, t0, t0);
-    fp2add(gX, t0, gX);
-}
-
-
-static void FinalExpo3(f2elm_t gX, f2elm_t gZ)
-{
-    unsigned int i;
-    f2elm_t f_;
-
-    fp2copy(gZ, f_);
-    fpneg(f_[1]);
-    fp2mul_mont(gX, f_, f_);
-    fp2inv_mont_bingcd(f_);
-    fpneg(gX[1]);
-    fp2mul_mont(gX,gZ, gX);
-    fp2mul_mont(gX,f_, gX);
-    for(i = 0; i < OALICE_BITS; i++)
-        fp2sqr_mont(gX, gX);
-    for(i = 0; i < OBOB_EXPON-1; i++)
-        cube_Fp2_cycl(gX, (digit_t*)Montgomery_one);
-}
-
-
-static void FinalExpo3_2way(f2elm_t *gX, f2elm_t *gZ)
-{
-    unsigned int i, j;
-    f2elm_t f_[2], finv[2];
-
-    for(i = 0; i < 2; i++) {
-        fp2copy(gZ[i], f_[i]);
-        fpneg(f_[i][1]);    // Conjugate
-        fp2mul_mont(gX[i], f_[i], f_[i]);
-    }
-    mont_n_way_inv(f_,2,finv);
-    for(i = 0; i < 2; i++) {
-        fpneg(gX[i][1]);
-        fp2mul_mont(gX[i], gZ[i], gX[i]);
-        fp2mul_mont(gX[i], finv[i], gX[i]);
-        for(j = 0; j < OALICE_BITS; j++)
-            fp2sqr_mont(gX[i], gX[i]);
-        for(j = 0; j < OBOB_EXPON-1; j++)
-            cube_Fp2_cycl(gX[i], (digit_t*)&Montgomery_one);
-    }
-}
-
-
-static bool FirstPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char *ind)
-{
-    point_full_proj_t R3,S3;
-    f2elm_t gX[2],gZ[2];
-    felm_t zero = {0};
-    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
-    unsigned char alpha,beta;
-
-    fpcopy((digit_t*)B_gen_3_tors + 0*NWORDS_FIELD, (R3->X)[0]);
-    fpcopy((digit_t*)B_gen_3_tors + 1*NWORDS_FIELD, (R3->X)[1]);
-    fpcopy((digit_t*)B_gen_3_tors + 2*NWORDS_FIELD, (R3->Y)[0]);
-    fpcopy((digit_t*)B_gen_3_tors + 3*NWORDS_FIELD, (R3->Y)[1]);
-    fpcopy((digit_t*)B_gen_3_tors + 4*NWORDS_FIELD, (S3->X)[0]);
-    fpcopy((digit_t*)B_gen_3_tors + 5*NWORDS_FIELD, (S3->X)[1]);
-    fpcopy((digit_t*)B_gen_3_tors + 6*NWORDS_FIELD, (S3->Y)[0]);
-    fpcopy((digit_t*)B_gen_3_tors + 7*NWORDS_FIELD, (S3->Y)[1]);
-
-    CompletePoint(P,R);
-    Tate3_proj(R3,R,gX[0],gZ[0]);
-    Tate3_proj(S3,R,gX[1],gZ[1]);
-    FinalExpo3_2way(gX,gZ);
-
-    // Do small DLog with respect to g_R3_S3
-    fp2correction(gX[0]);
-    fp2correction(gX[1]);
-    if (ct_compare((unsigned char*)gX[0][1], (unsigned char*)zero, (size_t)nbytes) == 0)    // = 1
-        alpha = 0;
-    else if (ct_compare((unsigned char*)gX[0][1], (unsigned char*)g_R_S_im, (size_t)nbytes) == 0)    // = g_R3_S3
-        alpha = 1;
-    else    // = g_R3_S3^2
-        alpha = 2;
-
-    if (ct_compare((unsigned char*)gX[1][1], (unsigned char*)zero, (size_t)nbytes) == 0)    // = 1
-        beta = 0;
-    else if (ct_compare((unsigned char*)gX[1][1], (unsigned char*)g_R_S_im, (size_t)nbytes) == 0)    // = g_R3_S3
-        beta = 1;
-    else    // = g_R3_S3^2
-        beta = 2;
-
-    if (alpha == 0 && beta == 0)   // Not full order
-        return false;
-
-    // Return the 3-torsion point that R lies above
-    if (alpha == 0)                // Lies above R3
-        *ind = 0;
-    else if (beta == 0)            // Lies above S3
-        *ind = 1;
-    else if (alpha + beta == 3)    // Lies above R3+S3
-        *ind = 3;
-    else                           // Lies above R3-S3
-        *ind = 2;
-
-    return true;
-}
-
-
-static bool SecondPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char ind)
-{
-    point_full_proj_t RS3;
-    f2elm_t gX, gZ;
-    felm_t zero = {0};
-    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
-
-    // Pair with 3-torsion point determined by first point
-    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 0)*NWORDS_FIELD, (RS3->X)[0]);
-    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 1)*NWORDS_FIELD, (RS3->X)[1]);
-    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 2)*NWORDS_FIELD, (RS3->Y)[0]);
-    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 3)*NWORDS_FIELD, (RS3->Y)[1]);
-
-    CompletePoint(P, R);
-    Tate3_proj(RS3, R, gX, gZ);
-    FinalExpo3(gX, gZ);
-
-    fp2correction(gX);
-    if (ct_compare((unsigned char*)gX[1], (unsigned char*)zero, (size_t)nbytes) != 0)    // Not equal to 1
-        return true;
-    else
-        return false;
-}
-
-
-static void FirstPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char *ind, unsigned char *bitEll)
-{
-    bool b = false;
-    point_proj_t P;
-    felm_t zero = {0};
-    *r = 0;
-
-    while (!b) {        
-        *bitEll = 0;
-        Elligator2(a24, *r, x, bitEll, COMPRESSION);    // Get x-coordinate on curve a24
-
-        fp2copy(x, P->X);
-        fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]);
-        fpcopy(zero, (P->Z)[1]);
-        eval_full_dual_4_isog(As, P);    // Move x over to A = 0
-
-        b = FirstPoint_dual(P, R, ind);  // Compute DLog with 3-torsion points
-        *r = *r + 1;
-    }
-}
-
-
-static void SecondPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char ind, unsigned char *bitEll)
-{
-    bool b = false;
-    point_proj_t P;
-    felm_t zero = {0};
-
-    while (!b) {
-        *bitEll = 0;
-        Elligator2(a24, *r, x, bitEll, COMPRESSION);
-
-        fp2copy(x, P->X);
-        fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]);
-        fpcopy(zero, (P->Z)[1]);
-        eval_full_dual_4_isog(As, P);    // Move x over to A = 0
-
-        b = SecondPoint_dual(P, R, ind);
-        *r = *r + 1;
-    }
-}
-
-
-static void makeDiff(const point_full_proj_t R, point_full_proj_t S, const point_proj_t D)
-{
-    f2elm_t t0, t1, t2;
-    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
-
-    fp2sub(R->X, S->X, t0);
-    fp2sub(R->Y, S->Y, t1);
-    fp2sqr_mont(t0, t0);
-    fp2sqr_mont(t1, t1);
-    fp2add(R->X, S->X, t2);
-    fp2mul_mont(t0, t2, t2);
-    fp2sub(t1, t2, t1);
-    fp2mul_mont(D->Z, t1, t1);
-    fp2mul_mont(D->X, t0, t0);
-    fp2correction(t0);
-    fp2correction(t1);
-    if (ct_compare((unsigned char*)t0[0], (unsigned char*)t1[0], (size_t)nbytes) == 0 && ct_compare((unsigned char*)t0[1], (unsigned char*)t1[1], (size_t)nbytes) == 0)
-        fp2neg(S->Y);
-}
-
-
-static void BuildOrdinary3nBasis_dual(const f2elm_t a24, const f2elm_t As[][5], point_full_proj_t *R, unsigned int *r, unsigned int *bitsEll)
-{
-    point_proj_t D;
-    f2elm_t xs[2];
-    unsigned char ind, bit;
-
-    FirstPoint3n(a24, As, xs[0], R[0], r, &ind, &bit);
-    *bitsEll = (unsigned int)bit;
-    *(r+1) = *r;
-    SecondPoint3n(a24, As, xs[1], R[1], r+1, ind, &bit);
-    *bitsEll |= ((unsigned int)bit << 1);
-
-    // Get x-coordinate of difference
-    BiQuad_affine(a24, xs[0], xs[1], D);
-    eval_full_dual_4_isog(As, D);    // Move x over to A = 0
-    makeDiff(R[0], R[1], D);
-}
-
-
 static void FullIsogeny_A_dual(unsigned char* PrivateKeyA, f2elm_t As[][5], f2elm_t a24, unsigned int sike)
 {
   // Input:  a private key PrivateKeyA in the range [0, 2^eA - 1]. 
@@ -604,21 +137,6 @@ static void Dlogs3_dual(const f2elm_t *f, int *D, digit_t *d0, digit_t *c0, digi
 }
 
 
-static void BuildOrdinary3nBasis_Decomp_dual(const f2elm_t A24, point_proj_t *Rs, unsigned char *r, const unsigned char bitsEll)
-{
-    unsigned char bitEll[2];
-    
-    bitEll[0] = bitsEll & 0x1;
-    bitEll[1] = (bitsEll >> 1) & 0x1;    
-    
-    // Elligator2 both x-coordinates
-    Elligator2(A24, (unsigned int)r[0]-1, Rs[0]->X, &bitEll[0], DECOMPRESSION);
-    Elligator2(A24, (unsigned int)r[1]-1, Rs[1]->X, &bitEll[1], DECOMPRESSION);
-    // Get x-coordinate of difference
-    BiQuad_affine(A24, Rs[0]->X, Rs[1]->X, Rs[2]);
-}
-
-
 static void PKADecompression_dual(const unsigned char* SecretKeyB, const unsigned char* CompressedPKA, point_proj_t R, f2elm_t A)
 {
     unsigned char bit, rs[3];
@@ -820,120 +338,6 @@ int EphemeralSecretAgreement_B(const unsigned char* PrivateKeyB, const unsigned
 }
 
 
-static void BuildEntangledXonly(const f2elm_t A, point_proj_t *R, unsigned char *qnr, unsigned char *ind)
-{
-    felm_t s;
-    f2elm_t *t_ptr, r, t;
-
-    // Select the correct table
-    if (is_sqr_fp2(A,  s)) {
-        t_ptr = (f2elm_t *)table_v_qnr; 
-        *qnr = 1;
-    } else {
-        t_ptr = (f2elm_t *)table_v_qr;
-        *qnr = 0;
-    }
-
-    // Get x0
-    *ind = 0;
-    do {
-        fp2mul_mont(A,  (felm_t *)*t_ptr++,  R[0]->X);    // R[0]->X =  A*v
-        fp2neg(R[0]->X);                                  // R[0]->X = -A*v
-        fp2add(R[0]->X,  A,  t);
-        fp2mul_mont(R[0]->X,  t,  t);
-        fpadd(t[0],  (digit_t*)Montgomery_one,  t[0]);
-        fp2mul_mont(R[0]->X,  t,  t);                     // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X
-        *ind += 1;
-    } while (!is_sqr_fp2(t,  s));
-    *ind -= 1;
-
-    if (*qnr)
-        fpcopy((digit_t*)table_r_qnr[*ind], r[0]);
-    else
-        fpcopy((digit_t*)table_r_qr[*ind], r[0]);
-
-    // Get x1
-    fp2add(R[0]->X, A, R[1]->X);
-    fp2neg(R[1]->X);    // R[1]->X = -R[0]->X-A
-
-    // Get difference x2,  z2
-    fp2sub(R[0]->X, R[1]->X, R[2]->Z);
-    fp2sqr_mont(R[2]->Z, R[2]->Z);
-
-    fpcopy(r[0], r[1]);    // (1+i)*ind
-    fpadd((digit_t*)Montgomery_one, r[0], r[0]);
-    fp2sqr_mont(r, r);
-    fp2mul_mont(t, r, R[2]->X);
-}
-
-
-static void RecoverY(const f2elm_t A, const point_proj_t *xs, point_full_proj_t *Rs)
-{
-    f2elm_t t0, t1, t2, t3, t4;
-
-    fp2mul_mont(xs[2]->X, xs[1]->Z, t0);
-    fp2mul_mont(xs[1]->X, xs[2]->Z, t1);
-    fp2mul_mont(xs[1]->X, xs[2]->X, t2);
-    fp2mul_mont(xs[1]->Z, xs[2]->Z, t3);
-    fp2sqr_mont(xs[1]->X, t4);
-    fp2sqr_mont(xs[1]->Z, Rs[1]->X);
-    fp2sub(t2, t3, Rs[1]->Y);
-    fp2mul_mont(xs[1]->X, Rs[1]->Y, Rs[1]->Y);
-    fp2add(t4, Rs[1]->X, t4);
-    fp2mul_mont(xs[2]->Z, t4, t4);
-    fp2mul_mont(A, t1, Rs[1]->X);
-    fp2sub(t0, t1, Rs[1]->Z);
-
-    fp2mul_mont(Rs[0]->X, Rs[1]->Z, t0);
-    fp2add(t2, Rs[1]->X, t1);
-    fp2add(t1, t1, t1);
-    fp2sub(t0, t1, t0);
-    fp2mul_mont(xs[1]->Z, t0, t0);
-    fp2sub(t0, t4, t0);
-    fp2mul_mont(Rs[0]->X, t0, t0);
-    fp2add(t0, Rs[1]->Y, Rs[1]->Y);
-    fp2mul_mont(Rs[0]->Y, t3, t0);
-    fp2mul_mont(xs[1]->X, t0, Rs[1]->X);
-    fp2add(Rs[1]->X, Rs[1]->X, Rs[1]->X);
-    fp2mul_mont(xs[1]->Z, t0, Rs[1]->Z);
-    fp2add(Rs[1]->Z, Rs[1]->Z, Rs[1]->Z);
-
-    fp2inv_mont_bingcd(Rs[1]->Z);
-    fp2mul_mont(Rs[1]->X, Rs[1]->Z, Rs[1]->X);
-    fp2mul_mont(Rs[1]->Y, Rs[1]->Z, Rs[1]->Y);
-}
-
-
-static void BuildOrdinary2nBasis_dual(const f2elm_t A, const f2elm_t Ds[][2], point_full_proj_t *Rs, unsigned char *qnr, unsigned char *ind)
-{
-    unsigned int i;
-    felm_t t0;
-    f2elm_t A6 = {0};
-    point_proj_t xs[3] = {0};
-
-    // Generate x-only entangled basis 
-    BuildEntangledXonly(A, xs, qnr, ind);
-    fpcopy((digit_t*)Montgomery_one, (xs[0]->Z)[0]);
-    fpcopy((digit_t*)Montgomery_one, (xs[1]->Z)[0]);
-
-    // Move them back to A = 6 
-    for(i = 0; i < MAX_Bob; i++) {
-        eval_3_isog(xs[0], Ds[MAX_Bob-1-i]);
-        eval_3_isog(xs[1], Ds[MAX_Bob-1-i]);
-        eval_3_isog(xs[2], Ds[MAX_Bob-1-i]);
-    }
-
-    // Recover y-coordinates with a single sqrt on A = 6
-    fpcopy((digit_t*)Montgomery_one, A6[0]);
-    fpadd(A6[0], A6[0], t0);
-    fpadd(t0, t0, A6[0]);
-    fpadd(A6[0], t0, A6[0]);    // A6 = 6 
-
-    CompleteMPoint(A6, xs[0], Rs[0]);
-    RecoverY(A6, xs, Rs);
-}
-
-
 static void FullIsogeny_B_dual(const unsigned char* PrivateKeyB, f2elm_t Ds[][2], f2elm_t A)
 { // Bob's ephemeral public key generation
   // Input:  a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. 
@@ -1008,46 +412,6 @@ static void Dlogs2_dual(const f2elm_t *f, int *D, digit_t *d0, digit_t *c0, digi
 }
 
 
-static void BuildEntangledXonly_Decomp(const f2elm_t A, point_proj_t *R, unsigned char qnr, unsigned char ind)
-{
-    f2elm_t *t_ptr, r, t;
-    
-    // Select the correct table
-    if ( qnr == 1 )
-        t_ptr = (f2elm_t *)table_v_qnr; 
-    else
-        t_ptr = (f2elm_t *)table_v_qr;
-
-    if (ind >= TABLE_V_LEN/2)
-        ind = 0;
-    // Get x0     
-    fp2mul_mont(A, t_ptr[ind], R[0]->X);    // x1 =  A*v
-    fp2neg(R[0]->X);                        // R[0]->X = -A*v
-    fp2add(R[0]->X, A, t);
-    fp2mul_mont(R[0]->X, t, t);
-    fpadd(t[0], (digit_t*)Montgomery_one, t[0]);
-    fp2mul_mont(R[0]->X, t, t);             // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X
-
-    if (qnr == 1)
-        fpcopy((digit_t*)table_r_qnr[ind], r[0]);
-    else
-        fpcopy((digit_t*)table_r_qr[ind], r[0]);
-
-    // Get x1 
-    fp2add(R[0]->X, A, R[1]->X);
-    fp2neg(R[1]->X);    // R[1]->X = -R[0]->X-A
-
-    // Get difference x2,z2 
-    fp2sub(R[0]->X, R[1]->X, R[2]->Z);
-    fp2sqr_mont(R[2]->Z, R[2]->Z);
-
-    fpcopy(r[0],r[1]); // (1+i)*ind 
-    fpadd((digit_t*)Montgomery_one, r[0], r[0]);
-    fp2sqr_mont(r, r);
-    fp2mul_mont(t, r, R[2]->X);
-}
-
-
 static void PKBDecompression_extended(const unsigned char* SecretKeyA, const unsigned char* CompressedPKB, point_proj_t R, f2elm_t A, unsigned char* tphiBKA_t)
 { // Bob's PK decompression -- SIKE protocol
     uint64_t mask = (digit_t)(-1);
@@ -1377,20 +741,19 @@ int8_t validate_ciphertext(const unsigned char* ephemeralsk_, const unsigned cha
 { // If ct validation passes returns 0, otherwise returns -1.
     point_proj_t phis[3] = {0}, R, S, pts[MAX_INT_POINTS_BOB];
     f2elm_t XPB, XQB, XRB, coeff[3], A24plus = {0}, A24minus = {0}, A = {0}, comp1 = {0}, comp2 = {0}, one = {0};
+    digit_t temp[NWORDS_ORDER] = {0}, sk[NWORDS_ORDER] = {0};
     unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
-    digit_t temp[NWORDS_ORDER] = {0};
-    digit_t sk[NWORDS_ORDER] = {0};    
-                
+                    
     fpcopy((digit_t*)&Montgomery_one, one[0]);    
     
     // Initialize basis points
     init_basis((digit_t*)B_gen, XPB, XQB, XRB);
 
     fp2_decode(xKA, phis[0]->X);
-    fpcopy((digit_t*)&Montgomery_one, phis[0]->Z[0]); // phi[0] <- PA + skA*QA    
+    fpcopy(one[0], phis[0]->Z[0]); // phis[0] <- PA + skA*QA    
 
     // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
-    fpcopy((digit_t*)&Montgomery_one, A24plus[0]);
+    fpcopy(one[0], A24plus[0]);
     fp2add(A24plus, A24plus, A24plus);
     fp2add(A24plus, A24plus, A24minus);  // A24minus = 4
     fp2add(A24plus, A24minus, A);        // A = 6
@@ -1432,7 +795,7 @@ int8_t validate_ciphertext(const unsigned char* ephemeralsk_, const unsigned cha
     fp2_decode(tphiBKA_t, S->X);
     fp2_decode(&tphiBKA_t[FP2_ENCODED_BYTES], S->Z);  // Recover t*3^n*((a0+skA*a1)*S1 + (b0+skA*b1)*S2)    
     decode_to_digits(&tphiBKA_t[2*FP2_ENCODED_BYTES], temp, ORDER_A_ENCODED_BYTES, NWORDS_ORDER);
-    Ladder(phis[0], temp, A, OALICE_BITS, R);         // t*(phiP + skA*phiQ)
+    Ladder(phis[0], temp, A, OALICE_BITS, R); // R <- t*(phiP + skA*phiQ), t in {(a0+skA*a1)^-1, (b0+skA*b1)^-1}
         
     fp2mul_mont(R->X, S->Z, comp1);
     fp2mul_mont(R->Z, S->X, comp2);             
diff --git a/src/compression/torsion_basis.c b/src/compression/torsion_basis.c
index c39d9d7..af54ef8 100644
--- a/src/compression/torsion_basis.c
+++ b/src/compression/torsion_basis.c
@@ -5,283 +5,479 @@
 *********************************************************************************************/
 
 
-void get_2_torsion_entangled_basis_compression(const f2elm_t A, point_t S1, point_t S2, unsigned char *bit, unsigned char *entry) 
-{ // Build an entangled basis for E[2^m].
-  // At first glance this is similar to the Elligator 2 technique, but the field element u is a *square* here, not a non-square.
-    unsigned int i, index, isSqrA = 0;
-    felm_t r = {0}, s, z, alpha, twoalphainv, beta;
-    f2elm_t t, u, u0, one = {0}, *t_ptr;
-    felm_t *x1 = (felm_t*) S1->x, *y1 = (felm_t*) S1->y, *x2 = (felm_t*) S2->x, *y2 = (felm_t*) S2->y;
-     
-    fpcopy((digit_t*)&Montgomery_one, one[0]);    
-    copy_words((const digit_t *)u0_entang, u0[0], 2*NWORDS_FIELD);
-    copy_words((const digit_t *)u_entang, u[0], 2*NWORDS_FIELD);
+#define COMPRESSION 0
+#define DECOMPRESSION 1
 
-    // Select the correct table
-    if (is_sqr_fp2(A, s)) {
-        t_ptr = (f2elm_t *)table_v_qnr; 
-        isSqrA = 1;
-    } else {
-        t_ptr = (f2elm_t *)table_v_qr;
-    }
-    *bit = (unsigned char)isSqrA;  // This bit is transmitted along with the PK to speedup decompression
 
-    index = 0;
-    do {
-        fp2mul_mont(A, (felm_t *)*t_ptr++, x1); // x1 =  A*v
-        fp2neg(x1);                             // x1 = -A*v
-        fp2add(x1, A, t);
-        fp2mul_mont(x1, t, t);
-        fpadd(t[0], one[0], t[0]);
-        fp2mul_mont(x1, t, t);                  // t = x1^3 + A*x1^2 + x1 = x1(x1(x1 + A) + 1)
-        index += 2;
-    } while (!is_sqr_fp2(t, s));
-    *entry = ((unsigned char)index - 2)/2;  // This table entry will also be transmitted along with the PubKey to speedup decompression
-    
-    if (isSqrA)
-        copy_words((const digit_t *)table_r_qnr[(index-2)/2], r, NWORDS_FIELD);
-    else
-        copy_words((const digit_t *)table_r_qr[(index-2)/2], r, NWORDS_FIELD);
+static void Elligator2(const f2elm_t a24, const unsigned int r, f2elm_t x, unsigned char *bit, const unsigned char COMPorDEC)
+{ // Generate an x-coordinate of a point on curve with (affine) coefficient a24 
+  // Use the counter r
+    int i;
+    felm_t one_fp, a2, b2, N, temp0, temp1;
+    f2elm_t A, y2, *t_ptr;
+
+    fpcopy((digit_t*)&Montgomery_one, one_fp);
+    fp2add(a24, a24, A);
+    fpsub(A[0], one_fp, A[0]);
+    fp2add(A, A, A);                       // A = 4*a24-2 
+
+    // Elligator computation
+    t_ptr = (f2elm_t *)&v_3_torsion[r];    
+    fp2mul_mont(A, (felm_t*)t_ptr, x);     // x = A*v; v := 1/(1 + U*r^2) table lookup
+    fp2neg(x);                             // x = -A*v;
     
-    // Finish sqrt computation for y1 = sqrt(x1^3+A*x1^2+x1)
-    fpadd(t[0],s,z);
-    fpdiv2(z,z);
-    fpcopy(z,alpha);
-    for (i = 0; i < OALICE_BITS - 2; i++) {         
-        fpsqr_mont(alpha, alpha);
-    }
-    for (i = 0; i < OBOB_EXPON; i++) {
-        fpsqr_mont(alpha, s);                                         
-        fpmul_mont(alpha, s, alpha);     // alpha = z^((p+1)/4)                                 
-    }
+    if (COMPorDEC == COMPRESSION) {
+        fp2add(A, x, y2);                      // y2 = x + A
+        fp2mul_mont(y2,  x,  y2);              // y2 = x*(x + A)
+        fpadd(y2[0],  one_fp,  y2[0]);         // y2 = x(x + A) + 1
+        fp2mul_mont(x, y2, y2);                // y2 = x*(x^2 + Ax + 1);
+        fpsqr_mont(y2[0], a2);
+        fpsqr_mont(y2[1], b2);
+        fpadd(a2, b2, N);                      // N := norm(y2);
 
-    fpadd(alpha,alpha,twoalphainv); 
-    fpinv_mont_bingcd(twoalphainv);   
-    fpmul_mont(t[1],twoalphainv,beta);
-    fpsqr_mont(alpha, twoalphainv);
-    fpcorrection(twoalphainv);
-    fpcorrection(z);   
-    if (ct_compare((unsigned char*)twoalphainv, (unsigned char*)z, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
-        fpcopy(alpha,y1[0]);
-        fpcopy(beta,y1[1]);
+        fpcopy(N, temp0);
+        for (i = 0; i < OALICE_BITS - 2; i++) {    
+            fpsqr_mont(temp0,  temp0);
+        }
+        for (i = 0; i < OBOB_EXPON; i++) {
+            fpsqr_mont(temp0,  temp1);
+            fpmul_mont(temp0,  temp1,  temp0);
+        }
+        fpsqr_mont(temp0, temp1);              // z = N^((p + 1) div 4);
+        fpcorrection(temp1);
+        fpcorrection(N);
+        if (memcmp(temp1, N, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
+            fp2neg(x);
+            fp2sub(x, A, x);                   // x = -x - A;
+            if (COMPorDEC == COMPRESSION)
+                *bit = 1;        
+        }
     } else {
-        fpneg(beta);
-        fpcopy(beta,y1[0]);
-        fpneg(alpha);        
-        fpcopy(alpha,y1[1]);
-    }   
-    fp2add(x1, A, x2);
-    fp2neg(x2);                  // x2 = A*v - A
-    fp2mul_mont(u0,y1,y2);   
-    fpmul_mont(r,y2[0],y2[0]);
-    fpmul_mont(r,y2[1],y2[1]);   // y2 = u0*r*y1
+        if (*bit) {
+            fp2neg(x);
+            fp2sub(x,A,x);                              // x = -x - A;
+        }       
+    }
 }
 
 
-void get_2_torsion_entangled_basis_decompression(const f2elm_t A, point_t S1, point_t S2, unsigned char isASqr, unsigned char entry) 
-{ // Build an entangled basis for E[2^m] during decompression using the the entry and table already computed during compression
-    unsigned int i;
-    felm_t r = {0}, s, z, alpha, twoalphainv, beta;
-    f2elm_t t, u, u0, one = {0}, *t_ptr;
-    felm_t *x1 = (felm_t*) S1->x, *y1 = (felm_t*) S1->y, *x2 = (felm_t*) S2->x, *y2 = (felm_t*) S2->y;
-     
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    
-    copy_words((const digit_t *)u0_entang, u0[0], 2*NWORDS_FIELD);
-    copy_words((const digit_t *)u_entang, u[0], 2*NWORDS_FIELD);
-
-    // Select the table
-    t_ptr = isASqr ? (f2elm_t *)table_v_qnr : (f2elm_t *)table_v_qr;
-
-    fp2mul_mont(A, t_ptr[entry], x1); // x1 =  A*v
-    fp2neg(x1);                       // x1 = -A*v
-    fp2add(x1, A, t);
-    fp2mul_mont(x1, t, t);
-    fpadd(t[0], one[0], t[0]);
-    fp2mul_mont(x1, t, t);            // t = x1^3 + A*x1^2 + x1 = x1(x1(x1 + A) + 1)
-    is_sqr_fp2(t, s);
-    
-    if (isASqr)
-        copy_words((const digit_t *)table_r_qnr[entry], r, NWORDS_FIELD);
-    else
-        copy_words((const digit_t *)table_r_qr[entry], r, NWORDS_FIELD);
-    
-    // Finish sqrt computation for y1 = sqrt(x1^3+A*x1^2+x1)
-    fpadd(t[0],s,z);
-    fpdiv2(z,z);
-    fpcopy(z,alpha);
-    for (i = 0; i < OALICE_BITS - 2; i++) {         
-        fpsqr_mont(alpha, alpha);
+static void TripleAndParabola_proj(const point_full_proj_t R, f2elm_t l1x, f2elm_t l1z)
+{
+    fp2sqr_mont(R->X, l1z);
+    fp2add(l1z, l1z, l1x);
+    fp2add(l1x, l1z, l1x);
+    fpadd(l1x[0], (digit_t*)&Montgomery_one, l1x[0]);
+    fp2add(R->Y, R->Y, l1z);
+}
+
+
+static void Tate3_proj(const point_full_proj_t P, const point_full_proj_t Q, f2elm_t gX, f2elm_t gZ)
+{
+    f2elm_t t0, l1x;
+
+    TripleAndParabola_proj(P, l1x, gZ);
+    fp2sub(Q->X, P->X, gX);
+    fp2mul_mont(l1x, gX, gX);
+    fp2sub(P->Y, Q->Y, t0);
+    fp2mul_mont(gZ, t0, t0);
+    fp2add(gX, t0, gX);
+}
+
+
+static void FinalExpo3_2way(f2elm_t *gX, f2elm_t *gZ)
+{
+    unsigned int i, j;
+    f2elm_t f_[2], finv[2];
+
+    for(i = 0; i < 2; i++) {
+        fp2copy(gZ[i], f_[i]);
+        fpneg(f_[i][1]);    // Conjugate
+        fp2mul_mont(gX[i], f_[i], f_[i]);
     }
-    for (i = 0; i < OBOB_EXPON; i++) {
-        fpsqr_mont(alpha, s);                                         
-        fpmul_mont(alpha, s, alpha);     // alpha = z^((p+1)/4)                                 
+    mont_n_way_inv(f_,2,finv);
+    for(i = 0; i < 2; i++) {
+        fpneg(gX[i][1]);
+        fp2mul_mont(gX[i], gZ[i], gX[i]);
+        fp2mul_mont(gX[i], finv[i], gX[i]);
+        for(j = 0; j < OALICE_BITS; j++)
+            fp2sqr_mont(gX[i], gX[i]);
+        for(j = 0; j < OBOB_EXPON-1; j++)
+            cube_Fp2_cycl(gX[i], (digit_t*)&Montgomery_one);
     }
+}
+
+
+static void FinalExpo3(f2elm_t gX, f2elm_t gZ)
+{
+    unsigned int i;
+    f2elm_t f_;
+
+    fp2copy(gZ, f_);
+    fpneg(f_[1]);
+    fp2mul_mont(gX, f_, f_);
+    fp2inv_mont_bingcd(f_);
+    fpneg(gX[1]);
+    fp2mul_mont(gX,gZ, gX);
+    fp2mul_mont(gX,f_, gX);
+    for(i = 0; i < OALICE_BITS; i++)
+        fp2sqr_mont(gX, gX);
+    for(i = 0; i < OBOB_EXPON-1; i++)
+        cube_Fp2_cycl(gX, (digit_t*)Montgomery_one);
+}
+
+
+static void make_positive(f2elm_t x)
+{
+    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
+    felm_t zero = {0};
 
-    fpadd(alpha,alpha,twoalphainv);   
-    fpinv_mont_bingcd(twoalphainv);   
-    fpmul_mont(t[1],twoalphainv,beta);
-    fpsqr_mont(alpha, twoalphainv);
-    fpcorrection(twoalphainv);
-    fpcorrection(z);
-    if (ct_compare((unsigned char*)twoalphainv, (unsigned char*)z, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
-        fpcopy(alpha,y1[0]);
-        fpcopy(beta,y1[1]);
+    from_fp2mont(x, x);
+    if (memcmp(x[0], zero, (size_t)nbytes) != 0) {
+        if ((x[0][0] & 1) == 1)
+            fp2neg(x);
     } else {
-        fpneg(beta);
-        fpcopy(beta,y1[0]);
-        fpneg(alpha);        
-        fpcopy(alpha,y1[1]);
+        if ((x[1][0] & 1) == 1)
+            fp2neg(x);
     }
-    fp2add(x1, A, x2);
-    fp2neg(x2);                  // x2 = A*v - A
-    fp2mul_mont(u0,y1,y2);   
-    fpmul_mont(r,y2[0],y2[0]);
-    fpmul_mont(r,y2[1],y2[1]);   // y2 = u0*r*y1
+    to_fp2mont(x, x);
 }
 
 
-void BasePoint3n(f2elm_t A, unsigned int *r, point_proj_t P, point_proj_t Q)
-{ // xz-only construction of a point of order 3^n in the Montgomery curve y^2 = x^3 + A*x^2 + x from base counter r.
-  // This is essentially the Elligator 2 technique coupled with cofactor multiplication and LI checking.
-    int i;
-    felm_t a2, b2, N, temp0, temp1;
-    f2elm_t A2, A24, two = {0}, x, y2, one_fp2 = {0}, *t_ptr;
-    point_proj_t S;
-    
-    fpcopy((digit_t*)&Montgomery_one, one_fp2[0]);
-    fp2div2(A,A2);
-    fpcopy(one_fp2[0],two[0]);
-    fpadd(two[0], two[0], two[0]);
-    fp2add(A,two,A24);
-    fp2div2(A24,A24);
-    fp2div2(A24,A24);
-
-    t_ptr = (f2elm_t *)&v_3_torsion[*r];    
-    do {
-        *r += 1;        
-        fp2mul_mont(A,(felm_t*)t_ptr++,x); // x = A*v; v := 1/(1 + U*r^2) table lookup
-        fp2neg(x);                         // x = -A*v;
-        fp2add(A,x,y2);                    // y2 = x + A
-        fp2mul_mont(y2, x, y2);            // y2 = x*(x + A)
-        fpadd(y2[0], one_fp2[0], y2[0]);   // y2 = x(x + A) + 1
-        fp2mul_mont(x,y2,y2);              // y2 = x*(x^2 + Ax + 1);
-        fpsqr_mont(y2[0],a2);
-        fpsqr_mont(y2[1],b2);
-        fpadd(a2,b2,N);                    // N := norm(y2);
-
-        fpcopy(N,temp0);
-        for (i = 0; i < OALICE_BITS - 2; i++) {    
-            fpsqr_mont(temp0, temp0);
-        }
-        for (i = 0; i < OBOB_EXPON; i++) {
-            fpsqr_mont(temp0, temp1);
-            fpmul_mont(temp0, temp1, temp0);
-        }
-        fpsqr_mont(temp0,temp1);  // z = N^((p + 1) div 4);
-        fpcorrection(temp1);
-        fpcorrection(N);
-        if (ct_compare((unsigned char*)temp1,(unsigned char*)N,NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
-            fp2neg(x);
-            fp2sub(x,A,x);        // x = -x - A;
-        }
-        fp2copy(x,S->X);
-        fp2copy(one_fp2,S->Z);
-        Double(S,P,A24,OALICE_BITS);   // x, z := Double(A24, x, 1, eA);
-        xTPLe_fast(P,Q,A2,OBOB_EXPON-1);  // t, w := Triple(A_2, x, z, eB-1);
-        fp2correction(Q->X);
-        fp2correction(Q->Z);
-    }   while (is_felm_zero(Q->Z[0]) && is_felm_zero(Q->Z[1]));
+static bool FirstPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char *ind)
+{
+    point_full_proj_t R3,S3;
+    f2elm_t gX[2],gZ[2];
+    felm_t zero = {0};
+    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
+    unsigned char alpha,beta;
+
+    fpcopy((digit_t*)B_gen_3_tors + 0*NWORDS_FIELD, (R3->X)[0]);
+    fpcopy((digit_t*)B_gen_3_tors + 1*NWORDS_FIELD, (R3->X)[1]);
+    fpcopy((digit_t*)B_gen_3_tors + 2*NWORDS_FIELD, (R3->Y)[0]);
+    fpcopy((digit_t*)B_gen_3_tors + 3*NWORDS_FIELD, (R3->Y)[1]);
+    fpcopy((digit_t*)B_gen_3_tors + 4*NWORDS_FIELD, (S3->X)[0]);
+    fpcopy((digit_t*)B_gen_3_tors + 5*NWORDS_FIELD, (S3->X)[1]);
+    fpcopy((digit_t*)B_gen_3_tors + 6*NWORDS_FIELD, (S3->Y)[0]);
+    fpcopy((digit_t*)B_gen_3_tors + 7*NWORDS_FIELD, (S3->Y)[1]);
+
+    CompletePoint(P,R);
+    Tate3_proj(R3,R,gX[0],gZ[0]);
+    Tate3_proj(S3,R,gX[1],gZ[1]);
+    FinalExpo3_2way(gX,gZ);
+
+    // Do small DLog with respect to g_R3_S3
+    fp2correction(gX[0]);
+    fp2correction(gX[1]);
+    if (memcmp(gX[0][1], zero, (size_t)nbytes) == 0)    // = 1
+        alpha = 0;
+    else if (memcmp(gX[0][1], g_R_S_im, (size_t)nbytes) == 0)    // = g_R3_S3
+        alpha = 1;
+    else    // = g_R3_S3^2
+        alpha = 2;
+
+    if (memcmp(gX[1][1], zero, (size_t)nbytes) == 0)    // = 1
+        beta = 0;
+    else if (memcmp(gX[1][1], g_R_S_im, (size_t)nbytes) == 0)    // = g_R3_S3
+        beta = 1;
+    else    // = g_R3_S3^2
+        beta = 2;
+
+    if (alpha == 0 && beta == 0)   // Not full order
+        return false;
+
+    // Return the 3-torsion point that R lies above
+    if (alpha == 0)                // Lies above R3
+        *ind = 0;
+    else if (beta == 0)            // Lies above S3
+        *ind = 1;
+    else if (alpha + beta == 3)    // Lies above R3+S3
+        *ind = 3;
+    else                           // Lies above R3-S3
+        *ind = 2;
+
+    return true;
 }
 
 
-void BasePoint3n_decompression(f2elm_t A, const unsigned char r, point_proj_t P)
-{ // Deterministic xz-only construction of a point of order 3^n in the Montgomery curve y^2 = x^3 + A*x^2 + x from counter r1.
-  // Notice that the Elligator 2 counter r was generated beforehand during key compression without linear independence testing
-    int i;
-    felm_t a2, b2, N, temp0, temp1;
-    f2elm_t A2, A24, two = {0}, x, y2, one_fp2 = {0};
-    point_proj_t S;
-    
-    fpcopy((digit_t*)&Montgomery_one, one_fp2[0]);
-    fp2div2(A,A2);
-    fpcopy((digit_t*)&Montgomery_one,two[0]);
-    fpadd(two[0], two[0], two[0]);
-    fp2add(A,two,A24);
-    fp2div2(A24,A24);
-    fp2div2(A24,A24);
-
-    if ( (r-1) < TABLE_V3_LEN)
-        fp2mul_mont(A, (felm_t*)&v_3_torsion[r-1], x);  // x =  A*v;
-    else // If the index r is out of range, just use a default (0)
-        fp2mul_mont(A, (felm_t*)&v_3_torsion[0], x);  // x =  A*v0;
-    fp2neg(x);                                      // x = -A*v;
-    fp2add(x,A,y2);                                 // y2 = x + A             
-    fp2mul_mont(x,y2,y2);                           // y2 = x*(x + A)
-    fpadd(y2[0], one_fp2[0], y2[0]);                // y2 = x(x + A) + 1
-    fp2mul_mont(x, y2, y2);                         // y2 = x*(x^2 + A*x + 1);    
-    fpsqr_mont(y2[0],a2);
-    fpsqr_mont(y2[1],b2);
-    fpadd(a2,b2,N);                                 // N := Fp!norm(y2);
-
-    fpcopy(N,temp0);
-    for (i = 0; i < OALICE_BITS - 2; i++) {    
-        fpsqr_mont(temp0, temp0);
-    }
-    for (i = 0; i < OBOB_EXPON; i++) {
-        fpsqr_mont(temp0, temp1);
-        fpmul_mont(temp0, temp1, temp0);
+static bool SecondPoint_dual(const point_proj_t P, point_full_proj_t R, unsigned char ind)
+{
+    point_full_proj_t RS3;
+    f2elm_t gX, gZ;
+    felm_t zero = {0};
+    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
+
+    // Pair with 3-torsion point determined by first point
+    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 0)*NWORDS_FIELD, (RS3->X)[0]);
+    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 1)*NWORDS_FIELD, (RS3->X)[1]);
+    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 2)*NWORDS_FIELD, (RS3->Y)[0]);
+    fpcopy((digit_t*)B_gen_3_tors + (4*ind + 3)*NWORDS_FIELD, (RS3->Y)[1]);
+
+    CompletePoint(P, R);
+    Tate3_proj(RS3, R, gX, gZ);
+    FinalExpo3(gX, gZ);
+
+    fp2correction(gX);
+    if (memcmp(gX[1], zero, (size_t)nbytes) != 0)    // Not equal to 1
+        return true;
+    else
+        return false;
+}
+
+
+static void FirstPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char *ind, unsigned char *bitEll)
+{
+    bool b = false;
+    point_proj_t P;
+    felm_t zero = {0};
+    *r = 0;
+
+    while (!b) {        
+        *bitEll = 0;
+        Elligator2(a24, *r, x, bitEll, COMPRESSION);    // Get x-coordinate on curve a24
+
+        fp2copy(x, P->X);
+        fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]);
+        fpcopy(zero, (P->Z)[1]);
+        eval_full_dual_4_isog(As, P);    // Move x over to A = 0
+
+        b = FirstPoint_dual(P, R, ind);  // Compute DLog with 3-torsion points
+        *r = *r + 1;
     }
-    fpsqr_mont(temp0,temp1);  // z = N^((p + 1) div 4);
-    fpcorrection(temp1);
-    fpcorrection(N);
-    if (ct_compare((unsigned char*)temp1,(unsigned char*)N,NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
-        fp2neg(x);
-        fp2sub(x,A,x);        // x = -x - A;
+}
+
+
+static void SecondPoint3n(const f2elm_t a24, const f2elm_t As[][5], f2elm_t x, point_full_proj_t R, unsigned int *r, unsigned char ind, unsigned char *bitEll)
+{
+    bool b = false;
+    point_proj_t P;
+    felm_t zero = {0};
+
+    while (!b) {
+        *bitEll = 0;
+        Elligator2(a24, *r, x, bitEll, COMPRESSION);
+
+        fp2copy(x, P->X);
+        fpcopy((digit_t*)&Montgomery_one, (P->Z)[0]);
+        fpcopy(zero, (P->Z)[1]);
+        eval_full_dual_4_isog(As, P);    // Move x over to A = 0
+
+        b = SecondPoint_dual(P, R, ind);
+        *r = *r + 1;
     }
-    fp2copy(x,S->X);
-    fp2copy(one_fp2,S->Z);
-    Double(S,P,A24,OALICE_BITS);   // x, z := Double(A24, x, 1, eA);
 }
 
 
-void BuildOrdinaryE3nBasis(f2elm_t A, point_full_proj_t R1, point_full_proj_t R2, unsigned int rs[2])
-{ // Generate a basis for the 3^eB torsion
-    unsigned int r = 0;
-    f2elm_t t2w1, t1w2;
-    point_proj_t P, Q, R, S;
+static void makeDiff(const point_full_proj_t R, point_full_proj_t S, const point_proj_t D)
+{
+    f2elm_t t0, t1, t2;
+    unsigned long long nbytes = NBITS_TO_NBYTES(NBITS_FIELD);
+
+    fp2sub(R->X, S->X, t0);
+    fp2sub(R->Y, S->Y, t1);
+    fp2sqr_mont(t0, t0);
+    fp2sqr_mont(t1, t1);
+    fp2add(R->X, S->X, t2);
+    fp2mul_mont(t0, t2, t2);
+    fp2sub(t1, t2, t1);
+    fp2mul_mont(D->Z, t1, t1);
+    fp2mul_mont(D->X, t0, t0);
+    fp2correction(t0);
+    fp2correction(t1);
+    if (memcmp(t0[0], t1[0], (size_t)nbytes) == 0 && memcmp(t0[1], t1[1], (size_t)nbytes) == 0)
+        fp2neg(S->Y);
+}
+
+
+static void BiQuad_affine(const f2elm_t a24, const f2elm_t x0, const f2elm_t x1, point_proj_t R)
+{
+    f2elm_t Ap2, aa, bb, cc, t0, t1;
+
+    fp2add(a24, a24, Ap2);
+    fp2add(Ap2, Ap2, Ap2);    // Ap2 = a+2 = 4*a24
+
+    fp2sub(x0, x1, aa);
+    fp2sqr_mont(aa, aa);
+
+    fp2mul_mont(x0, x1, cc);
+    fpsub(cc[0], (digit_t*)Montgomery_one, cc[0]);
+    fp2sqr_mont(cc, cc);
+
+    fpsub(x0[0], (digit_t*)Montgomery_one, bb[0]);
+    fpcopy(x0[1], bb[1]);
+    fp2sqr_mont(bb, bb);
+    fp2mul_mont(Ap2, x0, t0);
+    fp2add(bb, t0, bb);
+    fp2mul_mont(x1, bb, bb);
+    fpsub(x1[0], (digit_t*)Montgomery_one, t0[0]);
+    fpcopy(x1[1], t0[1]);
+    fp2sqr_mont(t0, t0);
+    fp2mul_mont(Ap2, x1, t1);
+    fp2add(t0, t1, t0);
+    fp2mul_mont(x0, t0, t0);
+    fp2add(bb, t0, bb);
+    fp2add(bb, bb, bb);
+
+    fp2sqr_mont(bb, t0);
+    fp2mul_mont(aa, cc, t1);
+    fp2add(t1, t1, t1);
+    fp2add(t1, t1, t1);
+    fp2sub(t0, t1, t0);
+    sqrt_Fp2(t0, t0);
+    make_positive(t0);    // Make the sqrt "positive"
+    fp2add(bb, t0, R->X);
+    fp2add(aa, aa, R->Z);
+}
+
+
+static void BuildOrdinary3nBasis_dual(const f2elm_t a24, const f2elm_t As[][5], point_full_proj_t *R, unsigned int *r, unsigned int *bitsEll)
+{
+    point_proj_t D;
+    f2elm_t xs[2];
+    unsigned char ind, bit;
+
+    FirstPoint3n(a24, As, xs[0], R[0], r, &ind, &bit);
+    *bitsEll = (unsigned int)bit;
+    *(r+1) = *r;
+    SecondPoint3n(a24, As, xs[1], R[1], r+1, ind, &bit);
+    *bitsEll |= ((unsigned int)bit << 1);
+
+    // Get x-coordinate of difference
+    BiQuad_affine(a24, xs[0], xs[1], D);
+    eval_full_dual_4_isog(As, D);    // Move x over to A = 0
+    makeDiff(R[0], R[1], D);
+}
+
+
+static void BuildOrdinary3nBasis_Decomp_dual(const f2elm_t A24, point_proj_t *Rs, unsigned char *r, const unsigned char bitsEll)
+{
+    unsigned char bitEll[2];
     
-    // 1st basis point:
-    BasePoint3n(A,&r,P,Q);    
-    rs[0] = r;
+    bitEll[0] = bitsEll & 0x1;
+    bitEll[1] = (bitsEll >> 1) & 0x1;    
     
-    // 2nd basis point:
+    // Elligator2 both x-coordinates
+    Elligator2(A24, (unsigned int)r[0]-1, Rs[0]->X, &bitEll[0], DECOMPRESSION);
+    Elligator2(A24, (unsigned int)r[1]-1, Rs[1]->X, &bitEll[1], DECOMPRESSION);
+    // Get x-coordinate of difference
+    BiQuad_affine(A24, Rs[0]->X, Rs[1]->X, Rs[2]);
+}
+
+
+
+static void BuildEntangledXonly(const f2elm_t A, point_proj_t *R, unsigned char *qnr, unsigned char *ind)
+{
+    felm_t s;
+    f2elm_t *t_ptr, r, t;
+
+    // Select the correct table
+    if (is_sqr_fp2(A,  s)) {
+        t_ptr = (f2elm_t *)table_v_qnr; 
+        *qnr = 1;
+    } else {
+        t_ptr = (f2elm_t *)table_v_qr;
+        *qnr = 0;
+    }
+
+    // Get x0
+    *ind = 0;
     do {
-        BasePoint3n(A,&r,R,S);
-        fp2mul_mont(S->X,Q->Z,t2w1);
-        fp2mul_mont(Q->X,S->Z,t1w2);
-        fp2correction(t2w1);
-        fp2correction(t1w2);
-    } while (ct_compare((unsigned char*)t2w1[0],(unsigned char*)t1w2[0],NBITS_TO_NBYTES(NBITS_FIELD)) == 0 && ct_compare((unsigned char*)t2w1[1],(unsigned char*)t1w2[1],NBITS_TO_NBYTES(NBITS_FIELD)) == 0); // Pr[t2/w2 == t1/w1] = 1/4: E[loop length] = 4/3
-    rs[1] = r;
-
-    // NB: ideally the following point completions could share one inversion at the cost of 3 products, but this is not implemented here.
-    CompleteMPoint(A,P,R1);    
-    CompleteMPoint(A,R,R2);      
+        fp2mul_mont(A,  (felm_t *)*t_ptr++,  R[0]->X);    // R[0]->X =  A*v
+        fp2neg(R[0]->X);                                  // R[0]->X = -A*v
+        fp2add(R[0]->X,  A,  t);
+        fp2mul_mont(R[0]->X,  t,  t);
+        fpadd(t[0],  (digit_t*)Montgomery_one,  t[0]);
+        fp2mul_mont(R[0]->X,  t,  t);                     // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X
+        *ind += 1;
+    } while (!is_sqr_fp2(t,  s));
+    *ind -= 1;
+
+    if (*qnr)
+        fpcopy((digit_t*)table_r_qnr[*ind], r[0]);
+    else
+        fpcopy((digit_t*)table_r_qr[*ind], r[0]);
+
+    // Get x1
+    fp2add(R[0]->X, A, R[1]->X);
+    fp2neg(R[1]->X);    // R[1]->X = -R[0]->X-A
+
+    // Get difference x2,  z2
+    fp2sub(R[0]->X, R[1]->X, R[2]->Z);
+    fp2sqr_mont(R[2]->Z, R[2]->Z);
+
+    fpcopy(r[0], r[1]);    // (1+i)*ind
+    fpadd((digit_t*)Montgomery_one, r[0], r[0]);
+    fp2sqr_mont(r, r);
+    fp2mul_mont(t, r, R[2]->X);
 }
 
 
-void BuildOrdinaryE3nBasis_decompression(f2elm_t A, point_full_proj_t R1, point_full_proj_t R2, const unsigned char r1, const unsigned char r2)
-{ // Generate a basis for the 3^eB torsion using shared information
-    point_proj_t P, R;
-    
-    // 1st basis point:
-    BasePoint3n_decompression(A,r1,P);
-    // 2nd basis point:
-    BasePoint3n_decompression(A,r2,R);
+
+static void BuildOrdinary2nBasis_dual(const f2elm_t A, const f2elm_t Ds[][2], point_full_proj_t *Rs, unsigned char *qnr, unsigned char *ind)
+{
+    unsigned int i;
+    felm_t t0;
+    f2elm_t A6 = {0};
+    point_proj_t xs[3] = {0};
+
+    // Generate x-only entangled basis 
+    BuildEntangledXonly(A, xs, qnr, ind);
+    fpcopy((digit_t*)Montgomery_one, (xs[0]->Z)[0]);
+    fpcopy((digit_t*)Montgomery_one, (xs[1]->Z)[0]);
+
+    // Move them back to A = 6 
+    for(i = 0; i < MAX_Bob; i++) {
+        eval_3_isog(xs[0], Ds[MAX_Bob-1-i]);
+        eval_3_isog(xs[1], Ds[MAX_Bob-1-i]);
+        eval_3_isog(xs[2], Ds[MAX_Bob-1-i]);
+    }
+
+    // Recover y-coordinates with a single sqrt on A = 6
+    fpcopy((digit_t*)Montgomery_one, A6[0]);
+    fpadd(A6[0], A6[0], t0);
+    fpadd(t0, t0, A6[0]);
+    fpadd(A6[0], t0, A6[0]);
+
+    CompleteMPoint(A6, xs[0], Rs[0]);
+    RecoverY(A6, xs, Rs);
+}
+
+
+
+static void BuildEntangledXonly_Decomp(const f2elm_t A, point_proj_t *R, unsigned char qnr, unsigned char ind)
+{
+    f2elm_t *t_ptr, r, t;
     
-    // NB: ideally the following point completions could share one inversion at the cost of 3 products, but this is not implemented here.
-    CompleteMPoint(A,P,R1);    
-    CompleteMPoint(A,R,R2);   
+    // Select the correct table
+    if ( qnr == 1 )
+        t_ptr = (f2elm_t *)table_v_qnr; 
+    else
+        t_ptr = (f2elm_t *)table_v_qr;
+
+    if (ind >= TABLE_V_LEN/2)
+        ind = 0;
+    // Get x0     
+    fp2mul_mont(A, t_ptr[ind], R[0]->X);    // x1 =  A*v
+    fp2neg(R[0]->X);                        // R[0]->X = -A*v
+    fp2add(R[0]->X, A, t);
+    fp2mul_mont(R[0]->X, t, t);
+    fpadd(t[0], (digit_t*)Montgomery_one, t[0]);
+    fp2mul_mont(R[0]->X, t, t);             // t = R[0]->X^3 + A*R[0]->X^2 + R[0]->X
+
+    if (qnr == 1)
+        fpcopy((digit_t*)table_r_qnr[ind], r[0]);
+    else
+        fpcopy((digit_t*)table_r_qr[ind], r[0]);
+
+    // Get x1 
+    fp2add(R[0]->X, A, R[1]->X);
+    fp2neg(R[1]->X);    // R[1]->X = -R[0]->X-A
+
+    // Get difference x2,z2 
+    fp2sub(R[0]->X, R[1]->X, R[2]->Z);
+    fp2sqr_mont(R[2]->Z, R[2]->Z);
+
+    fpcopy(r[0],r[1]); // (1+i)*ind 
+    fpadd((digit_t*)Montgomery_one, r[0], r[0]);
+    fp2sqr_mont(r, r);
+    fp2mul_mont(t, r, R[2]->X);
 }
- 
diff --git a/src/ec_isogeny.c b/src/ec_isogeny.c
index 5b9607e..42ecd66 100644
--- a/src/ec_isogeny.c
+++ b/src/ec_isogeny.c
@@ -267,33 +267,36 @@ void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
 }
 
 
-void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
+void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24)
 { // Simultaneous doubling and differential addition.
   // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
   // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. 
     f2elm_t t0, t1, t2;
 
-    mp2_add(P->X, P->Z, t0);                        // t0 = XP+ZP
-    mp2_sub_p2(P->X, P->Z, t1);                     // t1 = XP-ZP
-    fp2sqr_mont(t0, P->X);                          // XP = (XP+ZP)^2
-    mp2_sub_p2(Q->X, Q->Z, t2);                     // t2 = XQ-ZQ
-    mp2_add(Q->X, Q->Z, Q->X);                      // XQ = XQ+ZQ
-    fp2mul_mont(t0, t2, t0);                        // t0 = (XP+ZP)*(XQ-ZQ)
-    fp2sqr_mont(t1, P->Z);                          // ZP = (XP-ZP)^2
-    fp2mul_mont(t1, Q->X, t1);                      // t1 = (XP-ZP)*(XQ+ZQ)
-    mp2_sub_p2(P->X, P->Z, t2);                     // t2 = (XP+ZP)^2-(XP-ZP)^2
-    fp2mul_mont(P->X, P->Z, P->X);                  // XP = (XP+ZP)^2*(XP-ZP)^2
-    fp2mul_mont(A24, t2, Q->X);                     // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
-    mp2_sub_p2(t0, t1, Q->Z);                       // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
-    mp2_add(Q->X, P->Z, P->Z);                      // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
-    mp2_add(t0, t1, Q->X);                          // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
-    fp2mul_mont(P->Z, t2, P->Z);                    // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
-    fp2sqr_mont(Q->Z, Q->Z);                        // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
-    fp2sqr_mont(Q->X, Q->X);                        // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
-    fp2mul_mont(Q->Z, xPQ, Q->Z);                   // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+    fp2add(P->X, P->Z, t0);                         // t0 = XP+ZP
+    fp2sub(P->X, P->Z, t1);                         // t1 = XP-ZP    
+    fp2sqr_mont(t0, P->X);                          // XP = (XP+ZP)^2    
+    fp2sub(Q->X, Q->Z, t2);                         // t2 = XQ-ZQ
+    fp2correction(t2);    
+    fp2add(Q->X, Q->Z, Q->X);                       // XQ = XQ+ZQ    
+    fp2mul_mont(t0, t2, t0);                        // t0 = (XP+ZP)*(XQ-ZQ)    
+    fp2sqr_mont(t1, P->Z);                          // ZP = (XP-ZP)^2    
+    fp2mul_mont(t1, Q->X, t1);                      // t1 = (XP-ZP)*(XQ+ZQ)    
+    fp2sub(P->X, P->Z, t2);                         // t2 = (XP+ZP)^2-(XP-ZP)^2    
+    fp2mul_mont(P->X, P->Z, P->X);                  // XP = (XP+ZP)^2*(XP-ZP)^2    
+    fp2mul_mont(t2, A24, Q->X);                     // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]    
+    fp2sub(t0, t1, Q->Z);                           // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)    
+    fp2add(Q->X, P->Z, P->Z);                       // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2    
+    fp2add(t0, t1, Q->X);                           // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)    
+    fp2mul_mont(P->Z, t2, P->Z);                    // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]    
+    fp2sqr_mont(Q->Z, Q->Z);                        // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2    
+    fp2sqr_mont(Q->X, Q->X);                        // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2    
+    fp2mul_mont(Q->X, ZPQ, Q->X);                   // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2    
+    fp2mul_mont(Q->Z, XPQ, Q->Z);                   // ZQ = XPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2          
 }
 
 
+
 static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option)
 { // Swap points.
   // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
@@ -354,8 +357,7 @@ static void LADDER3PT(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, con
         mask = 0 - (digit_t)swap;
 
         swap_points(R, R2, mask);
-        xDBLADD(R0, R2, R->X, A24);
-        fp2mul_mont(R2->X, R->Z, R2->X);
+        xDBLADD(R0, R2, R->X, R->Z, A24);
     }
     swap = 0 ^ prevbit;
     mask = 0 - (digit_t)swap;
@@ -364,6 +366,44 @@ static void LADDER3PT(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, con
 
 #ifdef COMPRESS
 
+
+static void RecoverY(const f2elm_t A, const point_proj_t *xs, point_full_proj_t *Rs)
+{
+    f2elm_t t0, t1, t2, t3, t4;
+
+    fp2mul_mont(xs[2]->X, xs[1]->Z, t0);
+    fp2mul_mont(xs[1]->X, xs[2]->Z, t1);
+    fp2mul_mont(xs[1]->X, xs[2]->X, t2);
+    fp2mul_mont(xs[1]->Z, xs[2]->Z, t3);
+    fp2sqr_mont(xs[1]->X, t4);
+    fp2sqr_mont(xs[1]->Z, Rs[1]->X);
+    fp2sub(t2, t3, Rs[1]->Y);
+    fp2mul_mont(xs[1]->X, Rs[1]->Y, Rs[1]->Y);
+    fp2add(t4, Rs[1]->X, t4);
+    fp2mul_mont(xs[2]->Z, t4, t4);
+    fp2mul_mont(A, t1, Rs[1]->X);
+    fp2sub(t0, t1, Rs[1]->Z);
+
+    fp2mul_mont(Rs[0]->X, Rs[1]->Z, t0);
+    fp2add(t2, Rs[1]->X, t1);
+    fp2add(t1, t1, t1);
+    fp2sub(t0, t1, t0);
+    fp2mul_mont(xs[1]->Z, t0, t0);
+    fp2sub(t0, t4, t0);
+    fp2mul_mont(Rs[0]->X, t0, t0);
+    fp2add(t0, Rs[1]->Y, Rs[1]->Y);
+    fp2mul_mont(Rs[0]->Y, t3, t0);
+    fp2mul_mont(xs[1]->X, t0, Rs[1]->X);
+    fp2add(Rs[1]->X, Rs[1]->X, Rs[1]->X);
+    fp2mul_mont(xs[1]->Z, t0, Rs[1]->Z);
+    fp2add(Rs[1]->Z, Rs[1]->Z, Rs[1]->Z);
+
+    fp2inv_mont_bingcd(Rs[1]->Z);
+    fp2mul_mont(Rs[1]->X, Rs[1]->Z, Rs[1]->X);
+    fp2mul_mont(Rs[1]->Y, Rs[1]->Z, Rs[1]->Y);
+}
+
+
 static void CompletePoint(const point_proj_t P, point_full_proj_t R)
 { // Complete point on A = 0 curve
     f2elm_t xz, s2, r2, yz, invz, t0, t1, one = {0};
@@ -391,7 +431,7 @@ void CompleteMPoint(const f2elm_t A, point_proj_t P, point_full_proj_t R)
     f2elm_t zero = {0}, one = {0}, xz, yz, s2, r2, invz, temp0, temp1;
 
     fpcopy((digit_t*)&Montgomery_one, one[0]);    
-    if (ct_compare((unsigned char*)P->Z[0], (unsigned char*)zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0 || ct_compare((unsigned char*)P->Z[1], (unsigned char*)zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
+    if (memcmp(P->Z[0], zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0 || memcmp(P->Z[1], zero, NBITS_TO_NBYTES(NBITS_FIELD)) != 0) {
         fp2mul_mont(P->X, P->Z, xz);       // xz = x*z;
         fpsub(P->X[0], P->Z[1], temp0[0]);
         fpadd(P->X[1], P->Z[0], temp0[1]);
@@ -482,136 +522,6 @@ void xTPLe_fast(point_proj_t P, point_proj_t Q, const f2elm_t A2, int e)
 }
 
 
-void ADD(const point_full_proj_t P, const f2elm_t QX, const f2elm_t QY, const f2elm_t QZ, const f2elm_t A, point_full_proj_t R)
-{ // General addition.
-  // Input: projective Montgomery points P=(XP:YP:ZP) and Q=(XQ:YQ:ZQ).
-  // Output: projective Montgomery point R <- P+Q = (XQP:YQP:ZQP). 
-    f2elm_t t0 = {0}, t1 = {0}, t2 = {0}, t3 = {0}, t4 = {0}, t5 = {0}, t6 = {0}, t7 = {0};
-
-    fp2mul_mont(QX, P->Z, t0);            // t0 = x2*Z1    
-    fp2mul_mont(P->X, QZ, t1);            // t1 = X1*z2    
-    fp2add(t0, t1, t2);                   // t2 = t0 + t1
-    fp2sub(t1, t0, t3);                   // t3 = t1 - t0
-    fp2mul_mont(QX, P->X, t0);            // t0 = x2*X1    
-    fp2mul_mont(P->Z, QZ, t1);            // t1 = Z1*z2
-    fp2add(t0, t1, t4);                   // t4 = t0 + t1
-    fp2mul_mont(t0, A, t0);               // t0 = t0*A
-    fp2mul_mont(QY, P->Y, t5);            // t5 = y2*Y1
-    fp2sub(t0, t5, t0);                   // t0 = t0 - t5
-    fp2mul_mont(t0, t1, t0);              // t0 = t0*t1
-    fp2add(t0, t0, t0);                   // t0 = t0 + t0
-    fp2mul_mont(t2, t4, t5);              // t5 = t2*t4
-    fp2add(t5, t0, t5);                   // t5 = t5 + t0
-    fp2sqr_mont(P->X, t0);                // t0 = X1 ^ 2
-    fp2sqr_mont(P->Z, t6);                // t6 = Z1 ^ 2
-    fp2add(t0, t6, t0);                   // t0 = t0 + t6    
-    fp2add(t1, t1, t1);                   // t1 = t1 + t1
-    fp2mul_mont(QY, P->X, t7);            // t7 = y2*X1
-    fp2mul_mont(QX, P->Y, t6);            // t6 = x2*Y1
-    fp2sub(t7, t6, t7);                   // t7 = t7 - t6
-    fp2mul_mont(t1, t7, t1);              // t1 = t1*t7
-    fp2mul_mont(A, t2, t7);               // t7 = A*t2
-    fp2add(t7, t4, t4);                   // t4 = t4 + t7
-    fp2mul_mont(t1, t4, t4);              // t4 = t1*t4
-    fp2mul_mont(QY, QZ, t1);              // t1 = y2*z2
-    fp2mul_mont(t0, t1, t0);              // t0 = t0*t1
-    fp2sqr_mont(QZ, t1);                  // t1 = z2 ^ 2
-    fp2sqr_mont(QX, t6);                  // t6 = x2 ^ 2
-    fp2add(t1, t6, t1);                   // t1 = t1 + t6
-    fp2mul_mont(P->Z, P->Y, t6);          // t6 = Z1*Y1
-    fp2mul_mont(t1, t6, t1);              // t1 = t1*t6
-    fp2sub(t0, t1, t0);                   // t0 = t0 - t1
-    fp2mul_mont(t2, t0, t0);              // t0 = t2*t0
-    fp2mul_mont(t5, t3, R->X);            // X3 = t5*t3    
-    fp2add(t4, t0, R->Y);                 // Y3 = t4 + t0
-    fp2sqr_mont(t3, t0);                  // t0 = t3 ^ 2
-    fp2mul_mont(t3, t0, R->Z);            // Z3 = t3*t0
-}
-
-
-void Mont_ladder(const f2elm_t x, const digit_t* m, point_proj_t P, point_proj_t Q, const f2elm_t A24, const unsigned int order_bits, const unsigned int order_fullbits)
-{ // The Montgomery ladder
-  // Inputs: the affine x-coordinate of a point P on E: B*y^2=x^3+A*x^2+x, 
-  //         scalar m
-  //         curve constant A24 = (A+2)/4
-  //         order_bits = subgroup order bitlength
-  //         order_fullbits = smallest multiple of 32 larger than the order bitlength
-  // Output: P = m*(x:1)
-    unsigned int bit = 0, owords = NBITS_TO_NWORDS(order_fullbits);
-    digit_t mask, scalar[NWORDS_ORDER];
-    int i;
-    
-    // Initializing with the points (1:0) and (x:1)
-    fpcopy((digit_t*)&Montgomery_one, (digit_t*)P->X[0]);
-    fpzero(P->X[1]);
-    fp2zero(P->Z);
-    
-    fp2copy(x, Q->X);    
-    fpcopy((digit_t*)&Montgomery_one, (digit_t*)Q->Z[0]);    
-    fpzero(Q->Z[1]);
-
-    for (i = NWORDS_ORDER-1; i >= 0; i--) {
-        scalar[i] = m[i];
-    }
-    
-    for (i = order_fullbits-order_bits; i > 0; i--) {
-        mp_shiftl1(scalar, owords);
-    }    
-    
-    for (i = order_bits; i > 0; i--) {
-        bit = (unsigned int)(scalar[owords-1] >> (RADIX-1));
-        mp_shiftl1(scalar, owords);
-        mask = 0-(digit_t)bit;
-
-        swap_points(P, Q, mask);        
-        xDBLADD(P, Q, x, A24);                     // If bit=0 then P <- 2*P and Q <- P+Q, 
-        swap_points(P, Q, mask);                   // else if bit=1 then Q <- 2*Q and P <- P+Q
-    }
-}
-
-
-void mont_twodim_scalarmult(digit_t* a, const point_t R, const point_t S, const f2elm_t A, const f2elm_t A24, point_full_proj_t P, const unsigned int order_bits)
-{ // Computes P = R + [a]S  
-    point_proj_t P0 = {0}, P1 = {0};
-    point_full_proj_t P2 = {0};
-    f2elm_t one = {0};    
-
-    fpcopy((digit_t*)&Montgomery_one, one[0]);
-    Mont_ladder(S->x, a, P0, P1, A24, order_bits, MAXBITS_ORDER);    
-    recover_os(P0->X, P0->Z, P1->X, P1->Z, S->x, S->y, A, P2->X, P2->Y, P2->Z);     
-    ADD(P2, R->x, R->y, one, A, P);       
-}
-
-
-void xDBLADD_proj(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24)
-{ // Simultaneous doubling and differential addition.
-  // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
-  // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. 
-    f2elm_t t0, t1, t2;
-
-    fp2add(P->X, P->Z, t0);                         // t0 = XP+ZP
-    fp2sub(P->X, P->Z, t1);                         // t1 = XP-ZP    
-    fp2sqr_mont(t0, P->X);                          // XP = (XP+ZP)^2    
-    fp2sub(Q->X, Q->Z, t2);                         // t2 = XQ-ZQ
-    fp2correction(t2);    
-    fp2add(Q->X, Q->Z, Q->X);                       // XQ = XQ+ZQ    
-    fp2mul_mont(t0, t2, t0);                        // t0 = (XP+ZP)*(XQ-ZQ)    
-    fp2sqr_mont(t1, P->Z);                          // ZP = (XP-ZP)^2    
-    fp2mul_mont(t1, Q->X, t1);                      // t1 = (XP-ZP)*(XQ+ZQ)    
-    fp2sub(P->X, P->Z, t2);                         // t2 = (XP+ZP)^2-(XP-ZP)^2    
-    fp2mul_mont(P->X, P->Z, P->X);                  // XP = (XP+ZP)^2*(XP-ZP)^2    
-    fp2mul_mont(t2, A24, Q->X);                     // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]    
-    fp2sub(t0, t1, Q->Z);                           // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)    
-    fp2add(Q->X, P->Z, P->Z);                       // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2    
-    fp2add(t0, t1, Q->X);                           // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)    
-    fp2mul_mont(P->Z, t2, P->Z);                    // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]    
-    fp2sqr_mont(Q->Z, Q->Z);                        // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2    
-    fp2sqr_mont(Q->X, Q->X);                        // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2    
-    fp2mul_mont(Q->X, ZPQ, Q->X);                   // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2    
-    fp2mul_mont(Q->Z, XPQ, Q->Z);                   // ZQ = XPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2          
-}
-
-
 void xDBL_e(const point_proj_t P, point_proj_t Q, const f2elm_t A24, const int e)
 { // Doubling of a Montgomery point in projective coordinates (X:Z) over affine curve coefficient A. 
   // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants (A+2)/4.
@@ -635,13 +545,110 @@ void xDBL_e(const point_proj_t P, point_proj_t Q, const f2elm_t A24, const int e
 }
 
 
-void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsigned int order_bits, point_proj_t R) 
+static void eval_dual_4_isog_shared(const f2elm_t X4pZ4, const f2elm_t X42, const f2elm_t Z42, f2elm_t* coeff)
+{
+    fp2sub(X42, Z42, coeff[0]);
+    fp2add(X42, Z42, coeff[1]);
+    fp2sqr_mont(X4pZ4, coeff[2]);
+    fp2sub(coeff[2], coeff[1], coeff[2]);
+}
+
+
+static void eval_dual_4_isog(const f2elm_t A24, const f2elm_t C24, const f2elm_t* coeff, point_proj_t P)
+{
+    f2elm_t t0, t1, t2, t3;
+
+    fp2add(P->X, P->Z, t0);
+    fp2sub(P->X, P->Z, t1);
+    fp2sqr_mont(t0, t0);
+    fp2sqr_mont(t1, t1);
+    fp2sub(t0, t1, t2);
+    fp2sub(C24, A24, t3);
+    fp2mul_mont(t2, t3, t3);
+    fp2mul_mont(C24, t0, t2);
+    fp2sub(t2, t3, t2);
+    fp2mul_mont(t2, t0, P->X);
+    fp2mul_mont(t3, t1, P->Z);
+    fp2mul_mont(coeff[0], P->X, P->X);
+    fp2mul_mont(coeff[1], P->Z, t0);
+    fp2add(P->X, t0, P->X);
+    fp2mul_mont(coeff[2], P->Z, P->Z);
+}
+
+
+static void get_4_isog_dual(const point_proj_t P, f2elm_t A24, f2elm_t C24, f2elm_t* coeff)
+{
+    fp2sub(P->X, P->Z, coeff[1]);
+    fp2add(P->X, P->Z, coeff[2]);
+    fp2sqr_mont(P->Z, coeff[4]);
+    fp2add(coeff[4], coeff[4], coeff[0]);
+    fp2sqr_mont(coeff[0], C24);
+    fp2add(coeff[0], coeff[0], coeff[0]);
+    fp2sqr_mont(P->X, coeff[3]);
+    fp2add(coeff[3], coeff[3], A24);
+    fp2sqr_mont(A24, A24);
+}
+
+
+#if (OALICE_BITS % 2 == 1)
+
+static void eval_dual_2_isog(const f2elm_t X2, const f2elm_t Z2, point_proj_t P)
+{
+    f2elm_t t0;
+
+    fp2add(P->X, P->Z, t0);
+    fp2sub(P->X, P->Z, P->Z);
+    fp2sqr_mont(t0, t0);
+    fp2sqr_mont(P->Z, P->Z);
+    fp2sub(t0, P->Z, P->Z);
+    fp2mul_mont(X2, P->Z, P->Z);
+    fp2mul_mont(Z2, t0, P->X);
+}
+
+#endif
+
+
+static void eval_final_dual_2_isog(point_proj_t P)
+{
+    f2elm_t t0, t1;
+    felm_t t2;
+
+    fp2add(P->X, P->Z, t0);
+    fp2mul_mont(P->X, P->Z, t1);
+    fp2sqr_mont(t0, P->X);
+    fpcopy((P->X)[0], t2);
+    fpcopy((P->X)[1], (P->X)[0]);
+    fpcopy(t2, (P->X)[1]);
+    fpneg((P->X)[1]);
+    fp2add(t1, t1, P->Z);
+    fp2add(P->Z, P->Z, P->Z);
+}
+
+
+static void eval_full_dual_4_isog(const f2elm_t As[][5], point_proj_t P)
 {
+    // First all 4-isogenies
+    for(unsigned int i = 0; i < MAX_Alice; i++) {
+        eval_dual_4_isog(As[MAX_Alice-i][0], As[MAX_Alice-i][1], *(As+MAX_Alice-i-1)+2, P);
+    }
+#if (OALICE_BITS % 2 == 1)
+    eval_dual_2_isog(As[MAX_Alice][2], As[MAX_Alice][3], P);
+#endif
+    eval_final_dual_2_isog(P);    // to A = 0
+}
+
+
+void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsigned int order_bits, point_proj_t R) 
+{ // The Montgomery ladder
+  // Inputs: a projective point P=[X:Z] on E: B*y^2=x^3+A*x^2+x, 
+  //         scalar m
+  //         curve coefficient A
+  //         order_bits = subgroup order bitlength
+  // Output: R = m*(X:Z)
     point_proj_t R0, R1;
     f2elm_t A24 = {0};
-    unsigned int bit = 0;
     digit_t mask;
-    int j, swap, prevbit = 0;    
+    unsigned int bit = 0, prevbit = 0, j, swap;
         
     fpcopy((digit_t*)&Montgomery_one, A24[0]);
     fpadd(A24[0], A24[0], A24[0]);
@@ -665,7 +672,7 @@ void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsig
         mask = 0 - (digit_t)swap;
 
         swap_points(R0, R1, mask);
-        xDBLADD_proj(R0, R1, P->X, P->Z, A24);
+        xDBLADD(R0, R1, P->X, P->Z, A24);
     }
     swap = 0 ^ prevbit;
     mask = 0 - (digit_t)swap;
@@ -675,4 +682,39 @@ void Ladder(const point_proj_t P, const digit_t* m, const f2elm_t A, const unsig
     fp2copy(R0->Z, R->Z);
 }
 
+
+static void Ladder3pt_dual(const point_proj_t *Rs, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t A24)
+{ // Project 3-point ladder
+    point_proj_t R0 = {0}, R2 = {0};
+    digit_t mask;
+    int i, nbits, bit, swap, prevbit = 0;
+
+    if (AliceOrBob == ALICE) {
+        nbits = OALICE_BITS;
+    } else {
+        nbits = OBOB_BITS;
+    }
+
+    fp2copy(Rs[1]->X, R0->X);
+    fp2copy(Rs[1]->Z, R0->Z);
+    fp2copy(Rs[2]->X, R2->X);
+    fp2copy(Rs[2]->Z, R2->Z);
+    fp2copy(Rs[0]->X, R->X);
+    fp2copy(Rs[0]->Z, R->Z);
+
+    // Main loop
+    for (i = 0; i < nbits; i++) {
+        bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1;
+        swap = bit ^ prevbit;
+        prevbit = bit;
+        mask = 0 - (digit_t)swap;
+
+        swap_points(R, R2, mask);
+        xDBLADD(R0, R2, R->X, R->Z, A24);
+    }
+    swap = 0 ^ prevbit;
+    mask = 0 - (digit_t)swap;
+    swap_points(R, R2, mask);
+}
+
 #endif
\ No newline at end of file
diff --git a/src/fpx.c b/src/fpx.c
index 722f40a..72629f8 100644
--- a/src/fpx.c
+++ b/src/fpx.c
@@ -969,7 +969,7 @@ unsigned char is_sqr_fp2(const f2elm_t a, felm_t s)
     fpsqr_mont(s,temp);          // s = z^((p+1)/4)
     fpcorrection(temp);
     fpcorrection(z);
-    if (ct_compare((unsigned char*)temp, (unsigned char*)z, NBITS_TO_NBYTES(NBITS_FIELD)) != 0)  // s^2 !=? z
+    if (memcmp(temp, z, NBITS_TO_NBYTES(NBITS_FIELD)) != 0)  // s^2 !=? z
         return 0;
     
     return 1;
@@ -1004,7 +1004,7 @@ void sqrt_Fp2(const f2elm_t u, f2elm_t y)
     fpcorrection(t0);
     fpcorrection(t3);
            
-    if (ct_compare((unsigned char*)t0, (unsigned char*)t3, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
+    if (memcmp(t0, t3, NBITS_TO_NBYTES(NBITS_FIELD)) == 0) {
         fpcopy(t1, y[0]);
         fpcopy(t2, y[1]);
     } else {
diff --git a/src/internal.h b/src/internal.h
index 7d0ceec..924f246 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -70,7 +70,7 @@ void mont_n_way_inv(const f2elm_t* vec, const int n, f2elm_t* out);
 void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv);
 
 // Simultaneous doubling and differential addition.
-void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24);
+void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24);
 
 // Doubling of a Montgomery point in projective coordinates (X:Z).
 void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24);