torch · elikosan · May 20, 2016 · May 20, 2016 · May 20, 2016 · May 27, 2016
diff --git a/File.lua b/File.lua
@@ -376,15 +376,21 @@ function File:readObject()
    end
 end
 
--- simple helpers to save/load arbitrary objects/tables
+-- simple helpers to save/load arbitrary objects/tables 
 function torch.save(filename, object, mode, referenced)
-   assert(mode == nil or mode == 'binary' or mode == 'ascii', '"binary" or "ascii" (or nil) expected for mode')
+   assert(mode == nil or mode == 'binary' or mode == 'b32' or mode == 'b64' or mode == 'ascii', '"binary" or "ascii" (or nil) expected for mode')
    assert(referenced == nil or referenced == true or referenced == false, 'true or false (or nil) expected for referenced')
+   local longSize
+   if mode == 'b32' or mode == 'b64' then
+      longSize = tonumber(mode:match('%d+')) / 8
+      mode = 'binary'
+   end
    mode = mode or 'binary'
    referenced = referenced == nil and true or referenced
    local file = torch.DiskFile(filename, 'w')
    file[mode](file)
    file:referenced(referenced)
+   if longSize then file:longSize(longSize) end
    file:writeObject(object)
    file:close()
 end

diff --git a/lib/TH/THGeneral.h.in b/lib/TH/THGeneral.h.in
@@ -14,6 +14,7 @@
 #cmakedefine USE_BLAS
 #cmakedefine USE_LAPACK
 #cmakedefine BLAS_F2C
+#cmakedefine MKL_ILP64
 
 #ifdef __cplusplus
 # define TH_EXTERNC extern "C"

diff --git a/lib/TH/THLapack.h b/lib/TH/THLapack.h
@@ -21,6 +21,17 @@ if (info < 0) {                                                     \
   THError(fmt, func, info, ##__VA_ARGS__);                          \
 }
 
+#ifdef MKL_ILP64
+// set  64 bit MKL integer type
+#if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER) 
+#define LAPACK_INT __int64 
+#else
+#define LAPACK_INT long long int
+#endif
+#else
+#define LAPACK_INT int
+#endif
+
 #include "generic/THLapack.h"
 #include "THGenerateAllTypes.h"
 

diff --git a/lib/TH/cmake/FindBLAS.cmake b/lib/TH/cmake/FindBLAS.cmake
@@ -242,32 +242,70 @@ endif()
 # Determine if blas was compiled with the f2c conventions
 IF (BLAS_LIBRARIES)
   SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
-  CHECK_C_SOURCE_RUNS("
+  IF (MKL_ILP64)
+	SET(CMAKE_REQUIRED_DEFINITIONS -DMKL_ILP64)
+	MESSAGE(STATUS "Checking F2C with MKL ILP64 ${CMAKE_REQUIRED_DEFINITIONS}")
+  ENDIF(MKL_ILP64)
+
+  set(f2c_code_d "
 #include <stdlib.h>
 #include <stdio.h>
 float x[4] = { 1, 2, 3, 4 };
 float y[4] = { .1, .01, .001, .0001 };
-int four = 4;
-int one = 1;
+#ifdef MKL_ILP64
+  #if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER) 
+    #define BLAS_INT __int64
+  #else
+    #define BLAS_INT long long
+ #endif
+#else
+  #define BLAS_INT int
+#endif
+BLAS_INT four = 4;
+BLAS_INT one = 1;
 extern double sdot_();
 int main() {
-  int i;
   double r = sdot_(&four, x, &one, y, &one);
   exit((float)r != (float).1234);
-}" BLAS_F2C_DOUBLE_WORKS )
-  CHECK_C_SOURCE_RUNS("
+}" )
+
+  CHECK_C_SOURCE_COMPILES("${f2c_code_d}" BLAS_F2C_DOUBLE_COMPILES )
+  IF (NOT BLAS_F2C_DOUBLE_COMPILES)
+    MESSAGE(STATUS "Warning F2C double check did not compile!!")
+	MESSAGE(STATUS "${f2c_code_d}")
+  ENDIF(NOT BLAS_F2C_DOUBLE_COMPILES)
+
+  CHECK_C_SOURCE_RUNS("${f2c_code_d}" BLAS_F2C_DOUBLE_WORKS )
+
+  set(f2c_code_f "
 #include <stdlib.h>
 #include <stdio.h>
 float x[4] = { 1, 2, 3, 4 };
 float y[4] = { .1, .01, .001, .0001 };
-int four = 4;
-int one = 1;
+#ifdef MKL_ILP64
+  #if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER) 
+    #define BLAS_INT __int64
+  #else
+    #define BLAS_INT long long
+ #endif
+#else
+  #define BLAS_INT int
+#endif
+BLAS_INT four = 4;
+BLAS_INT one = 1;
 extern float sdot_();
 int main() {
-  int i;
   double r = sdot_(&four, x, &one, y, &one);
   exit((float)r != (float).1234);
-}" BLAS_F2C_FLOAT_WORKS )
+}" )
+
+  CHECK_C_SOURCE_COMPILES("${f2c_code_f}" BLAS_F2C_FLOAT_COMPILES )
+  IF (NOT BLAS_F2C_FLOAT_COMPILES)
+    MESSAGE(STATUS "Warning F2C float check did not compile!!")
+  ENDIF(NOT BLAS_F2C_FLOAT_COMPILES)
+
+  CHECK_C_SOURCE_RUNS("${f2c_code_f}" BLAS_F2C_FLOAT_WORKS )
+
   IF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
     MESSAGE(STATUS "This BLAS uses the F2C return conventions")
     SET(BLAS_F2C TRUE)

diff --git a/lib/TH/cmake/FindMKL.cmake b/lib/TH/cmake/FindMKL.cmake
@@ -29,19 +29,27 @@ INCLUDE(CheckTypeSize)
 INCLUDE(CheckFunctionExists)
 
 # Intel Compiler Suite
-SET(INTEL_COMPILER_DIR CACHE STRING
+SET(INTEL_COMPILER_DIR $ENV{INTEL_COMPILER_DIR} CACHE STRING
   "Root directory of the Intel Compiler Suite (contains ipp, mkl, etc.)")
-SET(INTEL_MKL_DIR CACHE STRING
+SET(INTEL_MKL_DIR $ENV{INTEL_MKL_DIR} CACHE STRING
   "Root directory of the Intel MKL (standalone)")
+SET(MKL_ILP64 $ENV{MKL_ILP64} CACHE STRING
+  "Link with 64bit-interger version of MKL (_ilp64 instead of _lp64)")
 SET(INTEL_MKL_SEQUENTIAL OFF CACHE BOOL
   "Force using the sequential (non threaded) libraries")
 
+MESSAGE(STATUS "INTEL_MKL_DIR: ${INTEL_MKL_DIR}")
+
 # Checks
 CHECK_TYPE_SIZE("void*" SIZE_OF_VOIDP)
 IF ("${SIZE_OF_VOIDP}" EQUAL 8)
-  SET(mklvers "em64t")
+  SET(mklvers "intel64")
   SET(iccvers "intel64")
-  SET(mkl64s "_lp64")
+  IF (MKL_ILP64)
+	SET(mkl64s "_ilp64")
+  ELSE(MKL_ILP64)
+	SET(mkl64s "_lp64")
+  ENDIF(MKL_ILP64)
 ELSE ("${SIZE_OF_VOIDP}" EQUAL 8)
   SET(mklvers "32")
   SET(iccvers "ia32")
@@ -80,15 +88,26 @@ ENDIF (INTEL_COMPILER_DIR)
 IF (INTEL_MKL_DIR)
   # TODO: diagnostic if dir does not exist
   SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH}
-    "${INTEL_MKL_DIR}/include")
+    "${INTEL_MKL_DIR}/include/")
   SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}
-    "${INTEL_MKL_DIR}/lib/${mklvers}")
+    "${INTEL_MKL_DIR}/lib/${mklvers}/")
   IF (MSVC)
     SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}
-      "${INTEL_MKL_DIR}/lib/${iccvers}")
+      "${INTEL_MKL_DIR}/lib/${iccvers}/")
   ENDIF (MSVC)
 ENDIF (INTEL_MKL_DIR)
 
+# lib prefix
+IF (MSVC)
+  SET(CMAKE_FIND_LIBRARY_PREFIXES "")
+  SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll")
+ELSE(MSVC)
+  SET(CMAKE_FIND_LIBRARY_PREFIXES "lib")
+  SET(CMAKE_FIND_LIBRARY_SUFFIXES ".so" ".a")
+ENDIF (MSVC)
+
+MESSAGE(STATUS "Searching for MKL in ${CMAKE_LIBRARY_PATH} ...")
+
 # Try linking multiple libs
 MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags)
   # This macro checks for the existence of the combination of libraries given by _list.
@@ -258,9 +277,15 @@ ENDIF (MKL_LIBRARIES)
 IF(NOT MKL_FOUND AND MKL_FIND_REQUIRED)
   MESSAGE(FATAL_ERROR "MKL library not found. Please specify library  location")
 ENDIF(NOT MKL_FOUND AND MKL_FIND_REQUIRED)
+
+
 IF(NOT MKL_FIND_QUIETLY)
   IF(MKL_FOUND)
-    MESSAGE(STATUS "MKL library found")
+	IF (mkl64s)
+	  MESSAGE(STATUS "MKL 64bit library found: ${mkl64s}")
+	ELSE(mkl64s)
+	  MESSAGE(STATUS "MKL 32bit library found: ${mkl64s}")
+	ENDIF(mkl64s)
   ELSE(MKL_FOUND)
     MESSAGE(STATUS "MKL library not found")
   ENDIF(MKL_FOUND)

diff --git a/lib/TH/generic/THBlas.c b/lib/TH/generic/THBlas.c
@@ -9,24 +9,35 @@
 # define ffloat float
 #endif
 
-TH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC void dscal_(int *n, double *a, double *x, int *incx);
-TH_EXTERNC void sscal_(int *n, float *a, float *x, int *incx);
-TH_EXTERNC void dcopy_(int *n, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC void scopy_(int *n, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC void daxpy_(int *n, double *a, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC void saxpy_(int *n, float *a, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC double ddot_(int *n, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC ffloat sdot_(int *n, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC void dgemv_(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
-TH_EXTERNC void sgemv_(char *trans, int *m, int *n, float *alpha, float *a, int *lda, float *x, int *incx, float *beta, float *y, int *incy);
-TH_EXTERNC void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *a, int *lda);
-TH_EXTERNC void sger_(int *m, int *n, float *alpha, float *x, int *incx, float *y, int *incy, float *a, int *lda);
-TH_EXTERNC void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc);
-TH_EXTERNC void sgemm_(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc);
+#ifdef MKL_ILP64
+  // set  64 bit MKL integer type
+  #if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER) 
+    #define BLAS_INT __int64 
+  #else
+    #define BLAS_INT long long int
+ #endif
+#else
+  #define BLAS_INT int
+#endif
 
 
+TH_EXTERNC void dswap_(BLAS_INT *n, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
+TH_EXTERNC void sswap_(BLAS_INT *n, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
+TH_EXTERNC void dscal_(BLAS_INT *n, double *a, double *x, BLAS_INT *incx);
+TH_EXTERNC void sscal_(BLAS_INT *n, float *a, float *x, BLAS_INT *incx);
+TH_EXTERNC void dcopy_(BLAS_INT *n, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
+TH_EXTERNC void scopy_(BLAS_INT *n, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
+TH_EXTERNC void daxpy_(BLAS_INT *n, double *a, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
+TH_EXTERNC void saxpy_(BLAS_INT *n, float *a, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
+TH_EXTERNC double ddot_(BLAS_INT *n, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
+TH_EXTERNC ffloat sdot_(BLAS_INT *n, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
+TH_EXTERNC void dgemv_(char *trans, BLAS_INT *m, BLAS_INT *n, double *alpha, double *a, BLAS_INT *lda, double *x, BLAS_INT *incx, double *beta, double *y, BLAS_INT *incy);
+TH_EXTERNC void sgemv_(char *trans, BLAS_INT *m, BLAS_INT *n, float *alpha, float *a, BLAS_INT *lda, float *x, BLAS_INT *incx, float *beta, float *y, BLAS_INT *incy);
+TH_EXTERNC void dger_(BLAS_INT *m, BLAS_INT *n, double *alpha, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy, double *a, BLAS_INT *lda);
+TH_EXTERNC void sger_(BLAS_INT *m, BLAS_INT *n, float *alpha, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy, float *a, BLAS_INT *lda);
+TH_EXTERNC void dgemm_(char *transa, char *transb, BLAS_INT *m, BLAS_INT *n, BLAS_INT *k, double *alpha, double *a, BLAS_INT *lda, double *b, BLAS_INT *ldb, double *beta, double *c, BLAS_INT *ldc);
+TH_EXTERNC void sgemm_(char *transa, char *transb, BLAS_INT *m, BLAS_INT *n, BLAS_INT *k, float *alpha, float *a, BLAS_INT *lda, float *b, BLAS_INT *ldb, float *beta, float *c, BLAS_INT *ldc);
+
 
 void THBlas_(swap)(long n, real *x, long incx, real *y, long incy)
 {
@@ -39,9 +50,9 @@ void THBlas_(swap)(long n, real *x, long incx, real *y, long incy)
 #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
   if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
   {
-    int i_n = (int)n;
-    int i_incx = (int)incx;
-    int i_incy = (int)incy;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_incx = (BLAS_INT)incx;
+    BLAS_INT i_incy = (BLAS_INT)incy;
 
 #if defined(TH_REAL_IS_DOUBLE)
     dswap_(&i_n, x, &i_incx, y, &i_incy);
@@ -70,8 +81,8 @@ void THBlas_(scal)(long n, real a, real *x, long incx)
 #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
   if( (n <= INT_MAX) && (incx <= INT_MAX) )
   {
-    int i_n = (int)n;
-    int i_incx = (int)incx;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_incx = (BLAS_INT)incx;
 
 #if defined(TH_REAL_IS_DOUBLE)
     dscal_(&i_n, &a, x, &i_incx);
@@ -83,13 +94,8 @@ void THBlas_(scal)(long n, real a, real *x, long incx)
 #endif
   {
     long i;
-    for(i = 0; i < n; i++) {
-      if (a == 0) {
-        x[i*incx] = 0;
-      } else {
-        x[i*incx] *= a;
-      }
-    }
+    for(i = 0; i < n; i++)
+      x[i*incx] *= a;
   }
 }
 
@@ -104,9 +110,9 @@ void THBlas_(copy)(long n, real *x, long incx, real *y, long incy)
 #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
   if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
   {
-    int i_n = (int)n;
-    int i_incx = (int)incx;
-    int i_incy = (int)incy;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_incx = (BLAS_INT)incx;
+    BLAS_INT i_incy = (BLAS_INT)incy;
 
 #if defined(TH_REAL_IS_DOUBLE)
     dcopy_(&i_n, x, &i_incx, y, &i_incy);
@@ -134,9 +140,9 @@ void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy)
 #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
   if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
   {
-    int i_n = (int)n;
-    int i_incx = (int)incx;
-    int i_incy = (int)incy;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_incx = (BLAS_INT)incx;
+    BLAS_INT i_incy = (BLAS_INT)incy;
 
 #if defined(TH_REAL_IS_DOUBLE)
     daxpy_(&i_n, &a, x, &i_incx, y, &i_incy);
@@ -164,9 +170,9 @@ real THBlas_(dot)(long n, real *x, long incx, real *y, long incy)
 #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
   if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
   {
-    int i_n = (int)n;
-    int i_incx = (int)incx;
-    int i_incy = (int)incy;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_incx = (BLAS_INT)incx;
+    BLAS_INT i_incy = (BLAS_INT)incy;
 
 #if defined(TH_REAL_IS_DOUBLE)
     return (real) ddot_(&i_n, x, &i_incx, y, &i_incy);
@@ -195,11 +201,11 @@ void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, re
       (incx > 0) && (incx <= INT_MAX) &&
       (incy > 0) && (incy <= INT_MAX) )
   {
-    int i_m = (int)m;
-    int i_n = (int)n;
-    int i_lda = (int)lda;
-    int i_incx = (int)incx;
-    int i_incy = (int)incy;
+    BLAS_INT i_m = (BLAS_INT)m;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_lda = (BLAS_INT)lda;
+    BLAS_INT i_incx = (BLAS_INT)incx;
+    BLAS_INT i_incy = (BLAS_INT)incy;
 
 #if defined(TH_REAL_IS_DOUBLE)
     dgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy);
@@ -250,11 +256,11 @@ void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long
 #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
   if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX)  && (incx <= INT_MAX) && (incy <= INT_MAX) )
   {
-    int i_m = (int)m;
-    int i_n = (int)n;
-    int i_lda = (int)lda;
-    int i_incx = (int)incx;
-    int i_incy = (int)incy;
+    BLAS_INT i_m = (BLAS_INT)m;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_lda = (BLAS_INT)lda;
+    BLAS_INT i_incx = (BLAS_INT)incx;
+    BLAS_INT i_incy = (BLAS_INT)incy;
 
 #if defined(TH_REAL_IS_DOUBLE)
     dger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda);
@@ -309,12 +315,12 @@ void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha,
 #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
   if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX)  && (ldb <= INT_MAX) && (ldc <= INT_MAX) )
   {
-    int i_m = (int)m;
-    int i_n = (int)n;
-    int i_k = (int)k;
-    int i_lda = (int)lda;
-    int i_ldb = (int)ldb;
-    int i_ldc = (int)ldc;
+    BLAS_INT i_m = (BLAS_INT)m;
+    BLAS_INT i_n = (BLAS_INT)n;
+    BLAS_INT i_k = (BLAS_INT)k;
+    BLAS_INT i_lda = (BLAS_INT)lda;
+    BLAS_INT i_ldb = (BLAS_INT)ldb;
+    BLAS_INT i_ldc = (BLAS_INT)ldc;
 
 #if defined(TH_REAL_IS_DOUBLE)
     dgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc);