Edit README, update inline keyword

microsoft · Aug 25, 2021 · effa607 · effa607
1 parent 5ee6d81
commit effa607
Show file tree

Hide file tree

Showing 12 changed files with 74 additions and 74 deletions.
diff --git a/README.md b/README.md
@@ -102,11 +102,11 @@ See instructions below to choose an implementation option and compile on one of
  The following implementation options are available:
 - Portable implementations enabled by setting `OPT_LEVEL=GENERIC`. 
 - Optimized x64 assembly implementations for Linux\Mac OS X enabled by setting `ARCH=x64` and `OPT_LEVEL=FAST`.
-- Optimized ARMv8 assembly implementation for Linux enabled by setting `ARCH=ARM64` and `OPT_LEVEL=FAST`.
+- Optimized ARMv8 assembly implementation for Linux\Mac OS X enabled by setting `ARCH=ARM64` (or `ARCH=M1` for Apple M1 SoC) and `OPT_LEVEL=FAST`.
 
 Follow the instructions in the sections "_Instructions for Linux_" or "_Instructions for Windows_" below to configure these different implementation options.
 
-## Instructions for Linux
+## Instructions for Linux\Mac OS X
 
 By simply executing:
 
@@ -130,16 +130,16 @@ is supported on Haswell, and both MULX and ADX are supported on Broadwell, Skyla
 Note that USE_ADX can only be set to `TRUE` if `USE_MULX=TRUE`.
 The option `USE_MULX=FALSE` with `USE_ADX=FALSE` is only supported on p503 and p751.
 
-Options for x86/ARM/s390x:
+Options for x86/ARM/M1/s390x:
 
 ```sh
-$ make ARCH=[x86/ARM/s390x] CC=[gcc/clang]
+$ make ARCH=[x86/ARM/M1/s390x] CC=[gcc/clang]
 ```
 
-Options for ARM64:
+Options for ARM64 or Apple M1:
 
 ```sh
-$ make ARCH=[ARM64] CC=[gcc/clang] OPT_LEVEL=[FAST/GENERIC]
+$ make ARCH=[ARM64/M1] CC=[gcc/clang] OPT_LEVEL=[FAST/GENERIC]
 ```
 
 As in the x64 case, `OPT_LEVEL=FAST` enables the use of assembly optimizations on ARMv8 platforms.

diff --git a/src/P434/AMD64/fp_x64.c b/src/P434/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p434x2[NWORDS_FIELD];
 extern const uint64_t p434x4[NWORDS_FIELD];
 
 
-__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -91,7 +91,7 @@ __inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -118,7 +118,7 @@ __inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg434(digit_t* a)
+inline void fpneg434(digit_t* a)
 { // Modular negation, a = -a mod p434.
   // Input/output: a in [0, 2*p434-1] 
     unsigned int i, borrow = 0;

diff --git a/src/P434/generic/fp_generic.c b/src/P434/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p434x2[NWORDS64_FIELD];
 extern const uint64_t p434x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p. 
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p. 
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -69,7 +69,7 @@ __inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -88,7 +88,7 @@ __inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg434(digit_t* a)
+inline void fpneg434(digit_t* a)
 { // Modular negation, a = -a mod p434.
   // Input/output: a in [0, 2*p434-1] 
     unsigned int i, borrow = 0;

diff --git a/src/P503/AMD64/fp_x64.c b/src/P503/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p503x2[NWORDS_FIELD];
 extern const uint64_t p503x4[NWORDS_FIELD];
 
 
-__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -91,7 +91,7 @@ __inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -118,7 +118,7 @@ __inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg503(digit_t* a)
+inline void fpneg503(digit_t* a)
 { // Modular negation, a = -a mod p503.
   // Input/output: a in [0, 2*p503-1] 
     unsigned int i, borrow = 0;

diff --git a/src/P503/generic/fp_generic.c b/src/P503/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p503x2[NWORDS64_FIELD];
 extern const uint64_t p503x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -69,7 +69,7 @@ __inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -88,7 +88,7 @@ __inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg503(digit_t* a)
+inline void fpneg503(digit_t* a)
 { // Modular negation, a = -a mod p503.
   // Input/output: a in [0, 2*p503-1] 
     unsigned int i, borrow = 0;

diff --git a/src/P610/AMD64/fp_x64.c b/src/P610/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p610x2[NWORDS_FIELD];
 extern const uint64_t p610x4[NWORDS_FIELD];
 
 
-__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -91,7 +91,7 @@ __inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -118,7 +118,7 @@ __inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg610(digit_t* a)
+inline void fpneg610(digit_t* a)
 { // Modular negation, a = -a mod p610.
   // Input/output: a in [0, 2*p610-1] 
     unsigned int i, borrow = 0;

diff --git a/src/P610/generic/fp_generic.c b/src/P610/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p610x2[NWORDS64_FIELD];
 extern const uint64_t p610x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p. 
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -69,7 +69,7 @@ __inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -88,7 +88,7 @@ __inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg610(digit_t* a)
+inline void fpneg610(digit_t* a)
 { // Modular negation, a = -a mod p610.
   // Input/output: a in [0, 2*p610-1] 
     unsigned int i, borrow = 0;

diff --git a/src/P751/AMD64/fp_x64.c b/src/P751/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p751x2[NWORDS_FIELD];
 extern const uint64_t p751x4[NWORDS_FIELD];
 
 
-__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 }  
 
 
-__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -91,7 +91,7 @@ __inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -118,7 +118,7 @@ __inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg751(digit_t* a)
+inline void fpneg751(digit_t* a)
 { // Modular negation, a = -a mod p751.
   // Input/output: a in [0, 2*p751-1] 
     unsigned int i, borrow = 0;

diff --git a/src/P751/generic/fp_generic.c b/src/P751/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p751x2[NWORDS64_FIELD];
 extern const uint64_t p751x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 }   
 
 
-__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -69,7 +69,7 @@ __inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -88,7 +88,7 @@ __inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg751(digit_t* a)
+inline void fpneg751(digit_t* a)
 { // Modular negation, a = -a mod p751.
   // Input/output: a in [0, 2*p751-1] 
     unsigned int i, borrow = 0;