diff --git a/src/P434/ARM64/fp_arm64_asm.S b/src/P434/ARM64/fp_arm64_asm.S index f86f817..ad4ddf3 100644 --- a/src/P434/ARM64/fp_arm64_asm.S +++ b/src/P434/ARM64/fp_arm64_asm.S @@ -4,7 +4,15 @@ // Abstract: field arithmetic in 64-bit ARMv8 assembly for P434 on Linux //******************************************************************************************* +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + .text +.align 2 // p434 p434: @@ -43,8 +51,8 @@ p434p1_nz: // Field addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global fpadd434_asm -fpadd434_asm: +.global fmt(fpadd434_asm) +fmt(fpadd434_asm): // Add a + b ldp x3, x4, [x0,#0] @@ -105,8 +113,8 @@ fpadd434_asm: // Field subtraction // Operation: c [x2] = a [x0] - b [x1] //*********************************************************************** -.global fpsub434_asm -fpsub434_asm: +.global fmt(fpsub434_asm) +fmt(fpsub434_asm): // Subtract a - b ldp x3, x4, [x0,#0] @@ -198,8 +206,8 @@ fpsub434_asm: // Multiprecision subtraction with correction with 2*p434 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p434 //*********************************************************************** -.global mp_sub434_p2_asm -mp_sub434_p2_asm: +.global fmt(mp_sub434_p2_asm) +fmt(mp_sub434_p2_asm): SUB434_PX p434x2 ret @@ -209,8 +217,8 @@ mp_sub434_p2_asm: // Multiprecision subtraction with correction with 4*p434 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p434 //*********************************************************************** -.global mp_sub434_p4_asm -mp_sub434_p4_asm: +.global fmt(mp_sub434_p4_asm) +fmt(mp_sub434_p4_asm): SUB434_PX p434x4 ret @@ -361,8 +369,8 @@ mp_sub434_p4_asm: // 448-bit integer multiplication using Karatsuba (two levels), Comba (lower level) // Operation: c [x2] = a [x0] * b [x1] //*********************************************************************************** -.global mul434_asm -mul434_asm: +.global fmt(mul434_asm) +fmt(mul434_asm): sub sp, sp, #96 ldp x3, x4, [x0] ldp x7, x8, [x0,#32] @@ -552,8 +560,8 @@ mul434_asm: // Operation: mc [x1] = ma [x0] // NOTE: ma=mc is not allowed //************************************************************************************** -.global rdc434_asm -rdc434_asm: +.global fmt(rdc434_asm) +fmt(rdc434_asm): sub sp, sp, #80 stp x21, x24, [sp,#16] stp x25, x26, [sp,#32] @@ -653,8 +661,8 @@ rdc434_asm: // 434-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add434_asm -mp_add434_asm: +.global fmt(mp_add434_asm) +fmt(mp_add434_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -682,8 +690,8 @@ mp_add434_asm: // 2x434-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add434x2_asm -mp_add434x2_asm: +.global fmt(mp_add434x2_asm) +fmt(mp_add434x2_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -729,8 +737,8 @@ mp_add434x2_asm: // 2x434-bit multiprecision subtraction/addition // Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448 //*********************************************************************** -.global mp_subadd434x2_asm -mp_subadd434x2_asm: +.global fmt(mp_subadd434x2_asm) +fmt(mp_subadd434x2_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -797,8 +805,8 @@ mp_subadd434x2_asm: // Double 2x434-bit multiprecision subtraction // Operation: c [x2] = c [x2] - a [x0] - b [x1] //*********************************************************************** -.global mp_dblsub434x2_asm -mp_dblsub434x2_asm: +.global fmt(mp_dblsub434x2_asm) +fmt(mp_dblsub434x2_asm): sub sp, sp, #32 stp x27, x28, [sp, #0] stp x29, x30, [sp, #16] @@ -863,4 +871,4 @@ mp_dblsub434x2_asm: stp x15, x16, [x2,#96] ldp x29, x30, [sp, #16] add sp, sp, #32 - ret \ No newline at end of file + ret diff --git a/src/P503/ARM64/fp_arm64_asm.S b/src/P503/ARM64/fp_arm64_asm.S index 05c869f..914d789 100644 --- a/src/P503/ARM64/fp_arm64_asm.S +++ b/src/P503/ARM64/fp_arm64_asm.S @@ -4,7 +4,15 @@ // Abstract: field arithmetic in 64-bit ARMv8 assembly for P503 on Linux //******************************************************************************************* +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + .text +.align 2 // p503 p503: @@ -46,8 +54,8 @@ p503p1_nz_s8: // Field addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global fpadd503_asm -fpadd503_asm: +.global fmt(fpadd503_asm) +fmt(fpadd503_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -113,8 +121,8 @@ fpadd503_asm: // Field subtraction // Operation: c [x2] = a [x0] - b [x1] //*********************************************************************** -.global fpsub503_asm -fpsub503_asm: +.global fmt(fpsub503_asm) +fmt(fpsub503_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -213,8 +221,8 @@ fpsub503_asm: // Multiprecision subtraction with correction with 2*p503 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p503 //*********************************************************************** -.global mp_sub503_p2_asm -mp_sub503_p2_asm: +.global fmt(mp_sub503_p2_asm) +fmt(mp_sub503_p2_asm): SUB503_PX p503x2 ret @@ -224,8 +232,8 @@ mp_sub503_p2_asm: // Multiprecision subtraction with correction with 4*p503 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p503 //*********************************************************************** -.global mp_sub503_p4_asm -mp_sub503_p4_asm: +.global fmt(mp_sub503_p4_asm) +fmt(mp_sub503_p4_asm): SUB503_PX p503x4 ret @@ -332,8 +340,8 @@ mp_sub503_p4_asm: // 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level) // Operation: c [x2] = a [x0] * b [x1] //*********************************************************************************** -.global mul503_asm -mul503_asm: +.global fmt(mul503_asm) +fmt(mul503_asm): sub sp, sp, #96 ldp x3, x4, [x0] ldp x5, x6, [x0,#16] @@ -508,8 +516,8 @@ mul503_asm: // Operation: mc [x1] = ma [x0] // NOTE: ma=mc is not allowed //************************************************************************************** -.global rdc503_asm -rdc503_asm: +.global fmt(rdc503_asm) +fmt(rdc503_asm): sub sp, sp, #96 stp x23, x24, [sp, #32] stp x25, x26, [sp, #48] @@ -673,8 +681,8 @@ rdc503_asm: // 503-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add503_asm -mp_add503_asm: +.global fmt(mp_add503_asm) +fmt(mp_add503_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -703,8 +711,8 @@ mp_add503_asm: // 2x503-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add503x2_asm -mp_add503x2_asm: +.global fmt(mp_add503x2_asm) +fmt(mp_add503x2_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -755,8 +763,8 @@ mp_add503x2_asm: // 2x503-bit multiprecision subtraction/addition // Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512 //*********************************************************************** -.global mp_subadd503x2_asm -mp_subadd503x2_asm: +.global fmt(mp_subadd503x2_asm) +fmt(mp_subadd503x2_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -828,8 +836,8 @@ mp_subadd503x2_asm: // Double 2x503-bit multiprecision subtraction // Operation: c [x2] = c [x2] - a [x0] - b [x1] //*********************************************************************** -.global mp_dblsub503x2_asm -mp_dblsub503x2_asm: +.global fmt(mp_dblsub503x2_asm) +fmt(mp_dblsub503x2_asm): sub sp, sp, #32 stp x27, x28, [sp, #0] stp x29, x30, [sp, #16] diff --git a/src/P610/ARM64/fp_arm64_asm.S b/src/P610/ARM64/fp_arm64_asm.S index 365af21..b1ecf43 100644 --- a/src/P610/ARM64/fp_arm64_asm.S +++ b/src/P610/ARM64/fp_arm64_asm.S @@ -4,7 +4,15 @@ // Abstract: field arithmetic in 64-bit ARMv8 assembly for P610 on Linux //******************************************************************************************* +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + .text +.align 2 // p610 p610: @@ -51,8 +59,8 @@ p610p1_nz_s6: // Field addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global fpadd610_asm -fpadd610_asm: +.global fmt(fpadd610_asm) +fmt(fpadd610_asm): // Add a + b ldp x3, x4, [x0,#0] ldp x5, x6, [x0,#16] @@ -131,8 +139,8 @@ fpadd610_asm: // Field subtraction // Operation: c [x2] = a [x0] - b [x1] //*********************************************************************** -.global fpsub610_asm -fpsub610_asm: +.global fmt(fpsub610_asm) +fmt(fpsub610_asm): // Subtract a - b ldp x3, x4, [x0,#0] ldp x5, x6, [x0,#16] @@ -248,8 +256,8 @@ fpsub610_asm: // Multiprecision subtraction with correction with 2*p610 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p610 //*********************************************************************** -.global mp_sub610_p2_asm -mp_sub610_p2_asm: +.global fmt(mp_sub610_p2_asm) +fmt(mp_sub610_p2_asm): SUB610_PX p610x2 ret @@ -259,8 +267,8 @@ mp_sub610_p2_asm: // Multiprecision subtraction with correction with 4*p610 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p610 //*********************************************************************** -.global mp_sub610_p4_asm -mp_sub610_p4_asm: +.global fmt(mp_sub610_p4_asm) +fmt(mp_sub610_p4_asm): SUB610_PX p610x4 ret @@ -420,8 +428,8 @@ mp_sub610_p4_asm: // 640-bit integer multiplication using Karatsuba (two levels), Comba (lower level) // Operation: c [x2] = a [x0] * b [x1] //*********************************************************************************** -.global mul610_asm -mul610_asm: +.global fmt(mul610_asm) +fmt(mul610_asm): sub sp, sp, #96 ldp x3, x4, [x0] ldp x5, x6, [x0,#16] @@ -650,8 +658,8 @@ mul610_asm: // Operation: mc [x1] = ma [x0] // NOTE: ma=mc is not allowed //************************************************************************************** -.global rdc610_asm -rdc610_asm: +.global fmt(rdc610_asm) +fmt(rdc610_asm): sub sp, sp, #96 stp x19, x20, [sp] stp x21, x22, [sp,#16] @@ -863,8 +871,8 @@ rdc610_asm: // 610-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add610_asm -mp_add610_asm: +.global fmt(mp_add610_asm) +fmt(mp_add610_asm): ldp x3, x4, [x0,#0] ldp x11, x12, [x1,#0] ldp x5, x6, [x0,#16] @@ -898,8 +906,8 @@ mp_add610_asm: // 2x610-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add610x2_asm -mp_add610x2_asm: +.global fmt(mp_add610x2_asm) +fmt(mp_add610x2_asm): ldp x3, x4, [x0,#0] ldp x5, x6, [x0,#16] ldp x7, x8, [x0,#32] @@ -962,8 +970,8 @@ mp_add610x2_asm: // 2x610-bit multiprecision subtraction/addition // Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p610*2^640 //*********************************************************************** -.global mp_subadd610x2_asm -mp_subadd610x2_asm: +.global fmt(mp_subadd610x2_asm) +fmt(mp_subadd610x2_asm): ldp x3, x4, [x0,#0] ldp x5, x6, [x0,#16] ldp x11, x12, [x1,#0] @@ -1049,8 +1057,8 @@ mp_subadd610x2_asm: // Double 2x610-bit multiprecision subtraction // Operation: c [x2] = c [x2] - a [x0] - b [x1] //*********************************************************************** -.global mp_dblsub610x2_asm -mp_dblsub610x2_asm: +.global fmt(mp_dblsub610x2_asm) +fmt(mp_dblsub610x2_asm): sub sp, sp, #64 stp x19, x20, [sp] stp x21, x22, [sp, #16] @@ -1146,4 +1154,4 @@ mp_dblsub610x2_asm: ldp x25, x26, [sp, #32] ldp x27, x28, [sp, #48] add sp, sp, #64 - ret \ No newline at end of file + ret diff --git a/src/P751/ARM64/fp_arm64_asm.S b/src/P751/ARM64/fp_arm64_asm.S index fff0d88..216467e 100644 --- a/src/P751/ARM64/fp_arm64_asm.S +++ b/src/P751/ARM64/fp_arm64_asm.S @@ -4,7 +4,15 @@ // Abstract: field arithmetic in 64-bit ARMv8 assembly for P751 on Linux //******************************************************************************************* +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + .text +.align 2 // p751 p751: @@ -55,8 +63,8 @@ p751p1_nz_s32: // Field addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global fpadd751_asm -fpadd751_asm: +.global fmt(fpadd751_asm) +fmt(fpadd751_asm): sub sp, sp, #16 stp x19, x20, [sp] @@ -153,8 +161,8 @@ fpadd751_asm: // Field subtraction // Operation: c [x2] = a [x0] - b [x1] //*********************************************************************** -.global fpsub751_asm -fpsub751_asm: +.global fmt(fpsub751_asm) +fmt(fpsub751_asm): sub sp, sp, #16 str x19, [sp] @@ -297,8 +305,8 @@ fpsub751_asm: // Multiprecision subtraction with correction with 2*p751 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p751 //*********************************************************************** -.global mp_sub751_p2_asm -mp_sub751_p2_asm: +.global fmt(mp_sub751_p2_asm) +fmt(mp_sub751_p2_asm): SUB751_PX p751x2 ret @@ -308,8 +316,8 @@ mp_sub751_p2_asm: // Multiprecision subtraction with correction with 4*p751 // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p751 //*********************************************************************** -.global mp_sub751_p4_asm -mp_sub751_p4_asm: +.global fmt(mp_sub751_p4_asm) +fmt(mp_sub751_p4_asm): SUB751_PX p751x4 ret @@ -570,8 +578,8 @@ mp_sub751_p4_asm: // 768-bit integer multiplication using Karatsuba (two levels), Comba (lower level) // Operation: c [x2] = a [x0] * b [x1] //*********************************************************************************** -.global mul751_asm -mul751_asm: +.global fmt(mul751_asm) +fmt(mul751_asm): sub sp, sp, #96 stp x19, x20, [sp,#0] stp x21, x22, [sp,#16] @@ -925,8 +933,8 @@ mul751_asm: // Operation: mc [x1] = ma [x0] // NOTE: ma=mc is not allowed //************************************************************************************** -.global rdc751_asm -rdc751_asm: +.global fmt(rdc751_asm) +fmt(rdc751_asm): sub sp, sp, #96 stp x19, x20, [sp] stp x21, x22, [sp,#16] @@ -1117,8 +1125,8 @@ rdc751_asm: // 751-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add751_asm -mp_add751_asm: +.global fmt(mp_add751_asm) +fmt(mp_add751_asm): ldp x3, x4, [x0,#0] ldp x5, x6, [x0,#16] ldp x7, x8, [x0,#32] @@ -1158,8 +1166,8 @@ mp_add751_asm: // 2x751-bit multiprecision addition // Operation: c [x2] = a [x0] + b [x1] //*********************************************************************** -.global mp_add751x2_asm -mp_add751x2_asm: +.global fmt(mp_add751x2_asm) +fmt(mp_add751x2_asm): ldp x3, x4, [x0,#0] ldp x5, x6, [x0,#16] ldp x7, x8, [x0,#32] @@ -1232,8 +1240,8 @@ mp_add751x2_asm: // 2x751-bit multiprecision subtraction/addition // Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p751*2^768 //*********************************************************************** -.global mp_subadd751x2_asm -mp_subadd751x2_asm: +.global fmt(mp_subadd751x2_asm) +fmt(mp_subadd751x2_asm): sub sp, sp, #16 stp x19, x20, [sp] @@ -1340,8 +1348,8 @@ mp_subadd751x2_asm: // Double 2x751-bit multiprecision subtraction // Operation: c [x2] = c [x2] - a [x0] - b [x1] //*********************************************************************** -.global mp_dblsub751x2_asm -mp_dblsub751x2_asm: +.global fmt(mp_dblsub751x2_asm) +fmt(mp_dblsub751x2_asm): sub sp, sp, #96 stp x19, x20, [sp] stp x21, x22, [sp, #16]