diff --git a/README.md b/README.md
index 54cadfc..be4dfa0 100644
--- a/README.md
+++ b/README.md
@@ -102,11 +102,11 @@ See instructions below to choose an implementation option and compile on one of
  The following implementation options are available:
 - Portable implementations enabled by setting `OPT_LEVEL=GENERIC`. 
 - Optimized x64 assembly implementations for Linux\Mac OS X enabled by setting `ARCH=x64` and `OPT_LEVEL=FAST`.
-- Optimized ARMv8 assembly implementation for Linux enabled by setting `ARCH=ARM64` and `OPT_LEVEL=FAST`.
+- Optimized ARMv8 assembly implementation for Linux\Mac OS X enabled by setting `ARCH=ARM64` (or `ARCH=M1` for Apple M1 SoC) and `OPT_LEVEL=FAST`.
 
 Follow the instructions in the sections "_Instructions for Linux_" or "_Instructions for Windows_" below to configure these different implementation options.
 
-## Instructions for Linux
+## Instructions for Linux\Mac OS X
 
 By simply executing:
 
@@ -130,16 +130,16 @@ is supported on Haswell, and both MULX and ADX are supported on Broadwell, Skyla
 Note that USE_ADX can only be set to `TRUE` if `USE_MULX=TRUE`.
 The option `USE_MULX=FALSE` with `USE_ADX=FALSE` is only supported on p503 and p751.
 
-Options for x86/ARM/s390x:
+Options for x86/ARM/M1/s390x:
 
 ```sh
-$ make ARCH=[x86/ARM/s390x] CC=[gcc/clang]
+$ make ARCH=[x86/ARM/M1/s390x] CC=[gcc/clang]
 ```
 
-Options for ARM64:
+Options for ARM64 or Apple M1:
 
 ```sh
-$ make ARCH=[ARM64] CC=[gcc/clang] OPT_LEVEL=[FAST/GENERIC]
+$ make ARCH=[ARM64/M1] CC=[gcc/clang] OPT_LEVEL=[FAST/GENERIC]
 ```
 
 As in the x64 case, `OPT_LEVEL=FAST` enables the use of assembly optimizations on ARMv8 platforms.
diff --git a/src/P434/AMD64/fp_x64.c b/src/P434/AMD64/fp_x64.c
index 32c0f7c..5cb92a7 100644
--- a/src/P434/AMD64/fp_x64.c
+++ b/src/P434/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p434x2[NWORDS_FIELD];
 extern const uint64_t p434x4[NWORDS_FIELD];
 
 
-__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -91,7 +91,7 @@ __inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -118,7 +118,7 @@ __inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg434(digit_t* a)
+inline void fpneg434(digit_t* a)
 { // Modular negation, a = -a mod p434.
   // Input/output: a in [0, 2*p434-1] 
     unsigned int i, borrow = 0;
diff --git a/src/P434/generic/fp_generic.c b/src/P434/generic/fp_generic.c
index 0179b7f..83856b9 100755
--- a/src/P434/generic/fp_generic.c
+++ b/src/P434/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p434x2[NWORDS64_FIELD];
 extern const uint64_t p434x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p. 
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p. 
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -69,7 +69,7 @@ __inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p434.
   // Inputs: a, b in [0, 2*p434-1] 
   // Output: c in [0, 2*p434-1] 
@@ -88,7 +88,7 @@ __inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg434(digit_t* a)
+inline void fpneg434(digit_t* a)
 { // Modular negation, a = -a mod p434.
   // Input/output: a in [0, 2*p434-1] 
     unsigned int i, borrow = 0;
diff --git a/src/P503/AMD64/fp_x64.c b/src/P503/AMD64/fp_x64.c
index 11e58c1..ca3c6f2 100644
--- a/src/P503/AMD64/fp_x64.c
+++ b/src/P503/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p503x2[NWORDS_FIELD];
 extern const uint64_t p503x4[NWORDS_FIELD];
 
 
-__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -91,7 +91,7 @@ __inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -118,7 +118,7 @@ __inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg503(digit_t* a)
+inline void fpneg503(digit_t* a)
 { // Modular negation, a = -a mod p503.
   // Input/output: a in [0, 2*p503-1] 
     unsigned int i, borrow = 0;
diff --git a/src/P503/generic/fp_generic.c b/src/P503/generic/fp_generic.c
index 85f1a6f..87d8b09 100755
--- a/src/P503/generic/fp_generic.c
+++ b/src/P503/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p503x2[NWORDS64_FIELD];
 extern const uint64_t p503x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -69,7 +69,7 @@ __inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p503.
   // Inputs: a, b in [0, 2*p503-1] 
   // Output: c in [0, 2*p503-1] 
@@ -88,7 +88,7 @@ __inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg503(digit_t* a)
+inline void fpneg503(digit_t* a)
 { // Modular negation, a = -a mod p503.
   // Input/output: a in [0, 2*p503-1] 
     unsigned int i, borrow = 0;
diff --git a/src/P610/AMD64/fp_x64.c b/src/P610/AMD64/fp_x64.c
index da2a21c..e77022e 100644
--- a/src/P610/AMD64/fp_x64.c
+++ b/src/P610/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p610x2[NWORDS_FIELD];
 extern const uint64_t p610x4[NWORDS_FIELD];
 
 
-__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -91,7 +91,7 @@ __inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -118,7 +118,7 @@ __inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg610(digit_t* a)
+inline void fpneg610(digit_t* a)
 { // Modular negation, a = -a mod p610.
   // Input/output: a in [0, 2*p610-1] 
     unsigned int i, borrow = 0;
diff --git a/src/P610/generic/fp_generic.c b/src/P610/generic/fp_generic.c
index aa06179..e56a343 100755
--- a/src/P610/generic/fp_generic.c
+++ b/src/P610/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p610x2[NWORDS64_FIELD];
 extern const uint64_t p610x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p. 
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -69,7 +69,7 @@ __inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p610.
   // Inputs: a, b in [0, 2*p610-1] 
   // Output: c in [0, 2*p610-1] 
@@ -88,7 +88,7 @@ __inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg610(digit_t* a)
+inline void fpneg610(digit_t* a)
 { // Modular negation, a = -a mod p610.
   // Input/output: a in [0, 2*p610-1] 
     unsigned int i, borrow = 0;
diff --git a/src/P751/AMD64/fp_x64.c b/src/P751/AMD64/fp_x64.c
index a261811..d9e47fa 100644
--- a/src/P751/AMD64/fp_x64.c
+++ b/src/P751/AMD64/fp_x64.c
@@ -15,7 +15,7 @@ extern const uint64_t p751x2[NWORDS_FIELD];
 extern const uint64_t p751x4[NWORDS_FIELD];
 
 
-__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
     unsigned int i, borrow = 0;
@@ -37,7 +37,7 @@ __inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
     unsigned int i, borrow = 0;
@@ -59,7 +59,7 @@ __inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 }  
 
 
-__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -91,7 +91,7 @@ __inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -118,7 +118,7 @@ __inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg751(digit_t* a)
+inline void fpneg751(digit_t* a)
 { // Modular negation, a = -a mod p751.
   // Input/output: a in [0, 2*p751-1] 
     unsigned int i, borrow = 0;
diff --git a/src/P751/generic/fp_generic.c b/src/P751/generic/fp_generic.c
index be13566..d07750e 100755
--- a/src/P751/generic/fp_generic.c
+++ b/src/P751/generic/fp_generic.c
@@ -15,7 +15,7 @@ extern const uint64_t p751x2[NWORDS64_FIELD];
 extern const uint64_t p751x4[NWORDS64_FIELD];
 
 
-__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
     unsigned int i, borrow = 0;
 
@@ -30,7 +30,7 @@ __inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
+inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
     unsigned int i, borrow = 0;
 
@@ -45,7 +45,7 @@ __inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
 }   
 
 
-__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular addition, c = a+b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -69,7 +69,7 @@ __inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
 } 
 
 
-__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
+inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 { // Modular subtraction, c = a-b mod p751.
   // Inputs: a, b in [0, 2*p751-1] 
   // Output: c in [0, 2*p751-1] 
@@ -88,7 +88,7 @@ __inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline void fpneg751(digit_t* a)
+inline void fpneg751(digit_t* a)
 { // Modular negation, a = -a mod p751.
   // Input/output: a in [0, 2*p751-1] 
     unsigned int i, borrow = 0;
diff --git a/src/config.h b/src/config.h
index 69a066c..58a5121 100644
--- a/src/config.h
+++ b/src/config.h
@@ -157,17 +157,17 @@
 
 // The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
 
-static __inline unsigned int is_digit_nonzero_ct(digit_t x)
+static inline unsigned int is_digit_nonzero_ct(digit_t x)
 { // Is x != 0?
     return (unsigned int)((x | (0-x)) >> (RADIX-1));
 }
 
-static __inline unsigned int is_digit_zero_ct(digit_t x)
+static inline unsigned int is_digit_zero_ct(digit_t x)
 { // Is x = 0?
     return (unsigned int)(1 ^ is_digit_nonzero_ct(x));
 }
 
-static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
+static inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
 { // Is x < y?
     return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX-1)); 
 }
diff --git a/src/fpx.c b/src/fpx.c
index 2d509d5..6eadbd0 100644
--- a/src/fpx.c
+++ b/src/fpx.c
@@ -38,7 +38,7 @@ void ct_cmov(uint8_t *r, const uint8_t *a, unsigned int len, int8_t selector)
 }
 
 
-__inline static void encode_to_bytes(const digit_t* x, unsigned char* enc, int nbytes)
+inline static void encode_to_bytes(const digit_t* x, unsigned char* enc, int nbytes)
 { // Encoding digits to bytes according to endianness
 #ifdef _BIG_ENDIAN_
     int ndigits = nbytes / sizeof(digit_t);
@@ -56,7 +56,7 @@ __inline static void encode_to_bytes(const digit_t* x, unsigned char* enc, int n
 }
 
 
-__inline static void decode_to_digits(const unsigned char* x, digit_t* dec, int nbytes, int ndigits)
+inline static void decode_to_digits(const unsigned char* x, digit_t* dec, int nbytes, int ndigits)
 { // Decoding bytes to digits according to endianness
 
     dec[ndigits - 1] = 0;
@@ -87,7 +87,7 @@ static void fp2_decode(const unsigned char *x, f2elm_t dec)
 }
 
 
-__inline void fpcopy(const digit_t* a, digit_t* c)
+inline void fpcopy(const digit_t* a, digit_t* c)
 { // Copy a field element, c = a.
     unsigned int i;
 
@@ -96,7 +96,7 @@ __inline void fpcopy(const digit_t* a, digit_t* c)
 }
 
 
-__inline void fpzero(digit_t* a)
+inline void fpzero(digit_t* a)
 { // Zero a field element, a = 0.
     unsigned int i;
 
@@ -185,14 +185,14 @@ void fp2neg(f2elm_t a)
 }
 
 
-__inline void fp2add(const f2elm_t a, const f2elm_t b, f2elm_t c)           
+inline void fp2add(const f2elm_t a, const f2elm_t b, f2elm_t c)           
 { // GF(p^2) addition, c = a+b in GF(p^2).
     fpadd(a[0], b[0], c[0]);
     fpadd(a[1], b[1], c[1]);
 }
 
 
-__inline void fp2sub(const f2elm_t a, const f2elm_t b, f2elm_t c)          
+inline void fp2sub(const f2elm_t a, const f2elm_t b, f2elm_t c)          
 { // GF(p^2) subtraction, c = a-b in GF(p^2).
     fpsub(a[0], b[0], c[0]);
     fpsub(a[1], b[1], c[1]);
@@ -213,7 +213,7 @@ void fp2correction(f2elm_t a)
 }
 
 
-__inline static void mp_addfast(const digit_t* a, const digit_t* b, digit_t* c)
+inline static void mp_addfast(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision addition, c = a+b.    
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM)
 
@@ -227,21 +227,21 @@ __inline static void mp_addfast(const digit_t* a, const digit_t* b, digit_t* c)
 }
 
 
-__inline static void mp2_add(const f2elm_t a, const f2elm_t b, f2elm_t c)       
+inline static void mp2_add(const f2elm_t a, const f2elm_t b, f2elm_t c)       
 { // GF(p^2) addition without correction, c = a+b in GF(p^2). 
     mp_addfast(a[0], b[0], c[0]);
     mp_addfast(a[1], b[1], c[1]);
 }
 
 
-__inline static void mp2_sub_p2(const f2elm_t a, const f2elm_t b, f2elm_t c)       
+inline static void mp2_sub_p2(const f2elm_t a, const f2elm_t b, f2elm_t c)       
 { // GF(p^2) subtraction with correction with 2*p, c = a-b+2p in GF(p^2).    
     mp_sub_p2(a[0], b[0], c[0]);  
     mp_sub_p2(a[1], b[1], c[1]);
 }
 
 
-__inline unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+inline unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
 { // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
     unsigned int i, carry = 0;
         
@@ -267,7 +267,7 @@ void fp2sqr_mont(const f2elm_t a, f2elm_t c)
 }
 
 
-__inline unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+inline unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
 { // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
     unsigned int i, borrow = 0;
 
@@ -278,7 +278,7 @@ __inline unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, con
 }
 
 
-__inline static void mp_subaddfast(const digit_t* a, const digit_t* b, digit_t* c)
+inline static void mp_subaddfast(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b. 
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM)
     felm_t t1;
@@ -296,7 +296,7 @@ __inline static void mp_subaddfast(const digit_t* a, const digit_t* b, digit_t*
 }
 
 
-__inline static void mp_dblsubfast(const digit_t* a, const digit_t* b, digit_t* c)
+inline static void mp_dblsubfast(const digit_t* a, const digit_t* b, digit_t* c)
 { // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
 #if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM)
 
@@ -823,7 +823,7 @@ void mp_shiftl1(digit_t* x, const unsigned int nwords)
 
 #ifdef COMPRESS
 
-static __inline unsigned int is_felm_zero(const felm_t x)
+static inline unsigned int is_felm_zero(const felm_t x)
 { // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
   // SECURITY NOTE: This function does not run in constant-time.
     unsigned int i;
@@ -834,7 +834,7 @@ static __inline unsigned int is_felm_zero(const felm_t x)
     return 1;
 }
 
-static __inline unsigned int is_felm_one(const felm_t x)
+static inline unsigned int is_felm_one(const felm_t x)
 { // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
   // SECURITY NOTE: This function does not run in constant-time.
     unsigned int i;
@@ -1007,7 +1007,7 @@ void sqrt_Fp2(const f2elm_t u, f2elm_t y)
 }
 
 
-static __inline void power2_setup(digit_t* x, int mark, const unsigned int nwords)
+static inline void power2_setup(digit_t* x, int mark, const unsigned int nwords)
 { // Set up the value 2^mark.
     unsigned int i;
 
@@ -1042,13 +1042,13 @@ int8_t cmp_f2elm(const f2elm_t x, const f2elm_t y)
 }
 
 
-static __inline unsigned int is_felm_even(const felm_t x)
+static inline unsigned int is_felm_even(const felm_t x)
 { // Is x even? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
     return (unsigned int)((x[0] & 1) ^ 1);
 }
 
 
-static __inline unsigned int is_felm_lt(const felm_t x, const felm_t y)
+static inline unsigned int is_felm_lt(const felm_t x, const felm_t y)
 { // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
   // SECURITY NOTE: This function does not run in constant-time.
 
@@ -1063,7 +1063,7 @@ static __inline unsigned int is_felm_lt(const felm_t x, const felm_t y)
 }
 
 
-static __inline unsigned int is_orderelm_lt(const digit_t *x, const digit_t *y)
+static inline unsigned int is_orderelm_lt(const digit_t *x, const digit_t *y)
 { // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
   // SECURITY NOTE: This function does not run in constant-time.
 
@@ -1078,7 +1078,7 @@ static __inline unsigned int is_orderelm_lt(const digit_t *x, const digit_t *y)
 }
 
 
-static __inline void fpinv_mont_bingcd_partial(const felm_t a, felm_t x1, unsigned int* k)
+static inline void fpinv_mont_bingcd_partial(const felm_t a, felm_t x1, unsigned int* k)
 { // Partial Montgomery inversion via the binary GCD algorithm.
     felm_t u, v, x2;
     unsigned int cwords;  // Number of words necessary for x1, x2
@@ -1285,7 +1285,7 @@ void from_Montgomery_mod_order(const digit_t* ma, digit_t* c, const digit_t* ord
 }
 
 
-static __inline unsigned int is_zero_mod_order(const digit_t* x)
+static inline unsigned int is_zero_mod_order(const digit_t* x)
 { // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
   // SECURITY NOTE: This function does not run in constant time.
     unsigned int i;
@@ -1297,13 +1297,13 @@ static __inline unsigned int is_zero_mod_order(const digit_t* x)
 }
 
 
-static __inline unsigned int is_even_mod_order(const digit_t* x)
+static inline unsigned int is_even_mod_order(const digit_t* x)
 { // Is x even? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
     return (unsigned int)((x[0] & 1) ^ 1);
 }
 
 
-static __inline unsigned int is_lt_mod_order(const digit_t* x, const digit_t* y)
+static inline unsigned int is_lt_mod_order(const digit_t* x, const digit_t* y)
 { // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
   // SECURITY NOTE: This function does not run in constant time.
     int i;
@@ -1319,7 +1319,7 @@ static __inline unsigned int is_lt_mod_order(const digit_t* x, const digit_t* y)
 }
 
 
-static __inline void Montgomery_inversion_mod_order_bingcd_partial(const digit_t* a, digit_t* x1, unsigned int* k, const digit_t* order)
+static inline void Montgomery_inversion_mod_order_bingcd_partial(const digit_t* a, digit_t* x1, unsigned int* k, const digit_t* order)
 { // Partial Montgomery inversion modulo order.
     digit_t u[NWORDS_ORDER], v[NWORDS_ORDER], x2[NWORDS_ORDER] = {0};
     unsigned int cwords;  // number of words necessary for x1, x2
diff --git a/src/random/random.c b/src/random/random.c
index 04a7fdf..028acbe 100644
--- a/src/random/random.c
+++ b/src/random/random.c
@@ -19,7 +19,7 @@
 #define failed 1
 
 
-static __inline void delay(unsigned int count)
+static inline void delay(unsigned int count)
 {
     while (count--) {}
 }