diff --git a/Makefile.arm64 b/Makefile.arm64 index a4f8bab6b9..b5170163f7 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -4,4 +4,8 @@ CCOMMON_OPT += -march=armv8-a FCOMMON_OPT += -march=armv8-a endif +ifeq ($(CORE), CORTEXA57) +CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 +FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 +endif diff --git a/TargetList.txt b/TargetList.txt index b2878ba323..dc1e08722e 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -74,3 +74,5 @@ ARMV5 7.ARM 64-bit CPU: ARMV8 +CORTEXA57 + diff --git a/common_arm64.h b/common_arm64.h index 15987c677a..ed5adbc593 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -89,8 +89,10 @@ static inline int blas_quickdivide(blasint x, blasint y){ #if defined(ASSEMBLER) && !defined(NEEDPARAM) #define PROLOGUE \ + .text ;\ + .align 4 ;\ .global REALNAME ;\ - .func REALNAME ;\ + .type REALNAME, %function ;\ REALNAME: #define EPILOGUE @@ -107,7 +109,11 @@ static inline int blas_quickdivide(blasint x, blasint y){ #endif #define HUGE_PAGESIZE ( 4 << 20) +#if defined(CORTEXA57) +#define BUFFER_SIZE (128 << 20) +#else #define BUFFER_SIZE (16 << 20) +#endif #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index c7a27f8913..a5a0b5e0a0 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -29,12 +29,19 @@ #define CPU_UNKNOWN 0 #define CPU_ARMV8 1 +#define CPU_CORTEXA57 2 static char *cpuname[] = { - "UNKOWN", - "ARMV8" + "UNKNOWN", + "ARMV8" , + "CORTEXA57" }; +static char *cpuname_lower[] = { + "unknown", + "armv8" , + "cortexa57" +}; int get_feature(char *search) { @@ -53,13 +60,13 @@ int get_feature(char *search) { p = strchr(buffer, ':') + 2; break; - } - } + } + } - fclose(infile); + fclose(infile); - if( p == NULL ) return; + if( p == NULL ) return 0; t = strtok(p," "); while( t = strtok(NULL," ")) @@ -82,11 +89,30 @@ int detect(void) p = (char *) NULL ; infile = fopen("/proc/cpuinfo", "r"); + while (fgets(buffer, sizeof(buffer), infile)) + { + if (!strncmp("CPU part", buffer, 8)) + { + p = strchr(buffer, ':') + 2; + break; + } + } + + fclose(infile); + if(p != NULL) { + if (strstr(p, "0xd07")) { + return CPU_CORTEXA57; + } + } + + p = (char *) NULL ; + infile = fopen("/proc/cpuinfo", "r"); while (fgets(buffer, sizeof(buffer), infile)) { - if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) + if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) || + (!strncmp("CPU architecture", buffer, 16))) { p = strchr(buffer, ':') + 2; break; @@ -100,7 +126,7 @@ int detect(void) if (strstr(p, "AArch64")) { - return CPU_ARMV8; + return CPU_ARMV8; } @@ -118,23 +144,13 @@ char *get_corename(void) void get_architecture(void) { - printf("ARM"); + printf("ARM64"); } void get_subarchitecture(void) { int d = detect(); - switch (d) - { - - case CPU_ARMV8: - printf("ARMV8"); - break; - - default: - printf("UNKNOWN"); - break; - } + printf("%s", cpuname[d]); } void get_subdirname(void) @@ -160,26 +176,32 @@ void get_cpuconfig(void) printf("#define L2_ASSOCIATIVE 4\n"); break; - + case CPU_CORTEXA57: + printf("#define CORTEXA57\n"); + printf("#define HAVE_VFP\n"); + printf("#define HAVE_VFPV3\n"); + printf("#define HAVE_NEON\n"); + printf("#define HAVE_VFPV4\n"); + printf("#define L1_CODE_SIZE 49152\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_CODE_ASSOCIATIVE 3\n"); + printf("#define L1_DATA_SIZE 32768\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L1_DATA_ASSOCIATIVE 2\n"); + printf("#define L2_SIZE 2097152\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define L2_ASSOCIATIVE 16\n"); + break; } } void get_libname(void) { - int d = detect(); - switch (d) - { - - case CPU_ARMV8: - printf("armv8\n"); - break; - - } + printf("%s", cpuname_lower[d]); } - void get_features(void) { diff --git a/getarch.c b/getarch.c index 0a49fd1b32..024ac4b3df 100644 --- a/getarch.c +++ b/getarch.c @@ -819,10 +819,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " #define LIBNAME "armv8" -#define CORENAME "XGENE1" -#else +#define CORENAME "ARMV8" #endif +#ifdef FORCE_CORTEXA57 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "ARMV8" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DCORTEXA57 " \ + "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ + "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" +#define LIBNAME "cortexa57" +#define CORENAME "CORTEXA57" +#else +#endif #ifndef FORCE diff --git a/kernel/arm64/KERNEL.CORTEXA57 b/kernel/arm64/KERNEL.CORTEXA57 new file mode 100644 index 0000000000..c2a370eb37 --- /dev/null +++ b/kernel/arm64/KERNEL.CORTEXA57 @@ -0,0 +1,2 @@ +include $(KERNELDIR)/KERNEL.ARMV8 + diff --git a/param.h b/param.h index 6c9ca83da9..d9bcc639ba 100644 --- a/param.h +++ b/param.h @@ -2214,6 +2214,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_R 4096 +#define SYMV_P 16 +#endif + + +#if defined(CORTEXA57) +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 4 +#define SGEMM_DEFAULT_UNROLL_N 4 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 512 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 480 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + + #define SYMV_P 16 #endif