Skip to content

Commit

Permalink
Merge pull request #48 from christianpaquin/fix-macos-arm-naming
Browse files Browse the repository at this point in the history
Fix macOS assembly exports on macOS.
  • Loading branch information
patricklonga authored Aug 25, 2021
2 parents 28b4b5d + 8376100 commit d2a5832
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 82 deletions.
50 changes: 29 additions & 21 deletions src/P434/ARM64/fp_arm64_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P434 on Linux
//*******************************************************************************************

// Format function and variable names for Mac OS X
#if defined(__APPLE__)
#define fmt(f) _##f
#else
#define fmt(f) f
#endif

.text
.align 2

// p434
p434:
Expand Down Expand Up @@ -43,8 +51,8 @@ p434p1_nz:
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global fpadd434_asm
fpadd434_asm:
.global fmt(fpadd434_asm)
fmt(fpadd434_asm):

// Add a + b
ldp x3, x4, [x0,#0]
Expand Down Expand Up @@ -105,8 +113,8 @@ fpadd434_asm:
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global fpsub434_asm
fpsub434_asm:
.global fmt(fpsub434_asm)
fmt(fpsub434_asm):

// Subtract a - b
ldp x3, x4, [x0,#0]
Expand Down Expand Up @@ -198,8 +206,8 @@ fpsub434_asm:
// Multiprecision subtraction with correction with 2*p434
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p434
//***********************************************************************
.global mp_sub434_p2_asm
mp_sub434_p2_asm:
.global fmt(mp_sub434_p2_asm)
fmt(mp_sub434_p2_asm):

SUB434_PX p434x2
ret
Expand All @@ -209,8 +217,8 @@ mp_sub434_p2_asm:
// Multiprecision subtraction with correction with 4*p434
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p434
//***********************************************************************
.global mp_sub434_p4_asm
mp_sub434_p4_asm:
.global fmt(mp_sub434_p4_asm)
fmt(mp_sub434_p4_asm):

SUB434_PX p434x4
ret
Expand Down Expand Up @@ -361,8 +369,8 @@ mp_sub434_p4_asm:
// 448-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
// Operation: c [x2] = a [x0] * b [x1]
//***********************************************************************************
.global mul434_asm
mul434_asm:
.global fmt(mul434_asm)
fmt(mul434_asm):
sub sp, sp, #96
ldp x3, x4, [x0]
ldp x7, x8, [x0,#32]
Expand Down Expand Up @@ -552,8 +560,8 @@ mul434_asm:
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
.global rdc434_asm
rdc434_asm:
.global fmt(rdc434_asm)
fmt(rdc434_asm):
sub sp, sp, #80
stp x21, x24, [sp,#16]
stp x25, x26, [sp,#32]
Expand Down Expand Up @@ -653,8 +661,8 @@ rdc434_asm:
// 434-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add434_asm
mp_add434_asm:
.global fmt(mp_add434_asm)
fmt(mp_add434_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -682,8 +690,8 @@ mp_add434_asm:
// 2x434-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add434x2_asm
mp_add434x2_asm:
.global fmt(mp_add434x2_asm)
fmt(mp_add434x2_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -729,8 +737,8 @@ mp_add434x2_asm:
// 2x434-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448
//***********************************************************************
.global mp_subadd434x2_asm
mp_subadd434x2_asm:
.global fmt(mp_subadd434x2_asm)
fmt(mp_subadd434x2_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -797,8 +805,8 @@ mp_subadd434x2_asm:
// Double 2x434-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global mp_dblsub434x2_asm
mp_dblsub434x2_asm:
.global fmt(mp_dblsub434x2_asm)
fmt(mp_dblsub434x2_asm):
sub sp, sp, #32
stp x27, x28, [sp, #0]
stp x29, x30, [sp, #16]
Expand Down Expand Up @@ -863,4 +871,4 @@ mp_dblsub434x2_asm:
stp x15, x16, [x2,#96]
ldp x29, x30, [sp, #16]
add sp, sp, #32
ret
ret
48 changes: 28 additions & 20 deletions src/P503/ARM64/fp_arm64_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P503 on Linux
//*******************************************************************************************

// Format function and variable names for Mac OS X
#if defined(__APPLE__)
#define fmt(f) _##f
#else
#define fmt(f) f
#endif

.text
.align 2

// p503
p503:
Expand Down Expand Up @@ -46,8 +54,8 @@ p503p1_nz_s8:
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global fpadd503_asm
fpadd503_asm:
.global fmt(fpadd503_asm)
fmt(fpadd503_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -113,8 +121,8 @@ fpadd503_asm:
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global fpsub503_asm
fpsub503_asm:
.global fmt(fpsub503_asm)
fmt(fpsub503_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -213,8 +221,8 @@ fpsub503_asm:
// Multiprecision subtraction with correction with 2*p503
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p503
//***********************************************************************
.global mp_sub503_p2_asm
mp_sub503_p2_asm:
.global fmt(mp_sub503_p2_asm)
fmt(mp_sub503_p2_asm):

SUB503_PX p503x2
ret
Expand All @@ -224,8 +232,8 @@ mp_sub503_p2_asm:
// Multiprecision subtraction with correction with 4*p503
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p503
//***********************************************************************
.global mp_sub503_p4_asm
mp_sub503_p4_asm:
.global fmt(mp_sub503_p4_asm)
fmt(mp_sub503_p4_asm):

SUB503_PX p503x4
ret
Expand Down Expand Up @@ -332,8 +340,8 @@ mp_sub503_p4_asm:
// 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
// Operation: c [x2] = a [x0] * b [x1]
//***********************************************************************************
.global mul503_asm
mul503_asm:
.global fmt(mul503_asm)
fmt(mul503_asm):
sub sp, sp, #96
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -508,8 +516,8 @@ mul503_asm:
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
.global rdc503_asm
rdc503_asm:
.global fmt(rdc503_asm)
fmt(rdc503_asm):
sub sp, sp, #96
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
Expand Down Expand Up @@ -673,8 +681,8 @@ rdc503_asm:
// 503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add503_asm
mp_add503_asm:
.global fmt(mp_add503_asm)
fmt(mp_add503_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -703,8 +711,8 @@ mp_add503_asm:
// 2x503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add503x2_asm
mp_add503x2_asm:
.global fmt(mp_add503x2_asm)
fmt(mp_add503x2_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -755,8 +763,8 @@ mp_add503x2_asm:
// 2x503-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
//***********************************************************************
.global mp_subadd503x2_asm
mp_subadd503x2_asm:
.global fmt(mp_subadd503x2_asm)
fmt(mp_subadd503x2_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -828,8 +836,8 @@ mp_subadd503x2_asm:
// Double 2x503-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global mp_dblsub503x2_asm
mp_dblsub503x2_asm:
.global fmt(mp_dblsub503x2_asm)
fmt(mp_dblsub503x2_asm):
sub sp, sp, #32
stp x27, x28, [sp, #0]
stp x29, x30, [sp, #16]
Expand Down
50 changes: 29 additions & 21 deletions src/P610/ARM64/fp_arm64_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P610 on Linux
//*******************************************************************************************

// Format function and variable names for Mac OS X
#if defined(__APPLE__)
#define fmt(f) _##f
#else
#define fmt(f) f
#endif

.text
.align 2

// p610
p610:
Expand Down Expand Up @@ -51,8 +59,8 @@ p610p1_nz_s6:
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global fpadd610_asm
fpadd610_asm:
.global fmt(fpadd610_asm)
fmt(fpadd610_asm):
// Add a + b
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -131,8 +139,8 @@ fpadd610_asm:
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global fpsub610_asm
fpsub610_asm:
.global fmt(fpsub610_asm)
fmt(fpsub610_asm):
// Subtract a - b
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -248,8 +256,8 @@ fpsub610_asm:
// Multiprecision subtraction with correction with 2*p610
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p610
//***********************************************************************
.global mp_sub610_p2_asm
mp_sub610_p2_asm:
.global fmt(mp_sub610_p2_asm)
fmt(mp_sub610_p2_asm):

SUB610_PX p610x2
ret
Expand All @@ -259,8 +267,8 @@ mp_sub610_p2_asm:
// Multiprecision subtraction with correction with 4*p610
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p610
//***********************************************************************
.global mp_sub610_p4_asm
mp_sub610_p4_asm:
.global fmt(mp_sub610_p4_asm)
fmt(mp_sub610_p4_asm):

SUB610_PX p610x4
ret
Expand Down Expand Up @@ -420,8 +428,8 @@ mp_sub610_p4_asm:
// 640-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
// Operation: c [x2] = a [x0] * b [x1]
//***********************************************************************************
.global mul610_asm
mul610_asm:
.global fmt(mul610_asm)
fmt(mul610_asm):
sub sp, sp, #96
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -650,8 +658,8 @@ mul610_asm:
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
.global rdc610_asm
rdc610_asm:
.global fmt(rdc610_asm)
fmt(rdc610_asm):
sub sp, sp, #96
stp x19, x20, [sp]
stp x21, x22, [sp,#16]
Expand Down Expand Up @@ -863,8 +871,8 @@ rdc610_asm:
// 610-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add610_asm
mp_add610_asm:
.global fmt(mp_add610_asm)
fmt(mp_add610_asm):
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
Expand Down Expand Up @@ -898,8 +906,8 @@ mp_add610_asm:
// 2x610-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add610x2_asm
mp_add610x2_asm:
.global fmt(mp_add610x2_asm)
fmt(mp_add610x2_asm):
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
Expand Down Expand Up @@ -962,8 +970,8 @@ mp_add610x2_asm:
// 2x610-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p610*2^640
//***********************************************************************
.global mp_subadd610x2_asm
mp_subadd610x2_asm:
.global fmt(mp_subadd610x2_asm)
fmt(mp_subadd610x2_asm):
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
Expand Down Expand Up @@ -1049,8 +1057,8 @@ mp_subadd610x2_asm:
// Double 2x610-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global mp_dblsub610x2_asm
mp_dblsub610x2_asm:
.global fmt(mp_dblsub610x2_asm)
fmt(mp_dblsub610x2_asm):
sub sp, sp, #64
stp x19, x20, [sp]
stp x21, x22, [sp, #16]
Expand Down Expand Up @@ -1146,4 +1154,4 @@ mp_dblsub610x2_asm:
ldp x25, x26, [sp, #32]
ldp x27, x28, [sp, #48]
add sp, sp, #64
ret
ret
Loading

0 comments on commit d2a5832

Please sign in to comment.