piiswrong closed pull request #8577: support for lapack functions with mkl
URL: https://github.com/apache/incubator-mxnet/pull/8577
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/make/config.mk b/make/config.mk
index d47d4d6931..a4774f0da8 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -105,6 +105,12 @@ USE_LAPACK = 1
 # path to lapack library in case of a non-standard installation
 USE_LAPACK_PATH =
 
+# by default, disable lapack when using MKL
+# switch on when there is a full installation of MKL available (not just 
MKL2017/MKL_ML)
+ifeq ($(USE_BLAS), mkl)
+USE_LAPACK = 0
+endif
+
 # add path to intel library, you may need it for MKL, if you did not add the 
path
 # to environment variable
 USE_INTEL_PATH = NONE
diff --git a/make/osx.mk b/make/osx.mk
index d9ce6f2df8..b17b04cfdb 100644
--- a/make/osx.mk
+++ b/make/osx.mk
@@ -67,6 +67,12 @@ USE_BLAS = apple
 # only effective when compiled with blas versions openblas/apple/atlas/mkl
 USE_LAPACK = 1
 
+# by default, disable lapack when using MKL
+# switch on when there is a full installation of MKL available (not just 
MKL2017/MKL_ML)
+ifeq ($(USE_BLAS), mkl)
+USE_LAPACK = 0
+endif
+
 # add path to intel library, you may need it for MKL, if you did not add the 
path
 # to environment variable
 USE_INTEL_PATH = NONE
diff --git a/src/operator/c_lapack_api.h b/src/operator/c_lapack_api.h
index 53b0bf258a..293c3f2f81 100644
--- a/src/operator/c_lapack_api.h
+++ b/src/operator/c_lapack_api.h
@@ -70,6 +70,9 @@
 
 using namespace mshadow;
 
+// Will cause clash with MKL fortran layer headers
+#if MSHADOW_USE_MKL == 0
+
 extern "C" {
 
   // Fortran signatures
@@ -118,15 +121,14 @@ extern "C" {
   MXNET_LAPACK_FSIG_SYEVD(dsyevd, double)
 }
 
-#define MXNET_LAPACK_ROW_MAJOR 101
-#define MXNET_LAPACK_COL_MAJOR 102
+#endif  // MSHADOW_USE_MKL == 0
+
 
 #define CHECK_LAPACK_UPLO(a) \
   CHECK(a == 'U' || a == 'L') << "neither L nor U specified as triangle in 
lapack call";
 
 inline char loup(char uplo, bool invert) { return invert ? (uplo == 'U' ? 'L' 
: 'U') : uplo; }
 
-
 /*!
  * \brief Transpose matrix data in memory
  *
@@ -160,7 +162,75 @@ inline void flip<cpu, double>(int m, int n,
 }
 
 
-#if MXNET_USE_LAPACK
+#if (MSHADOW_USE_MKL && MXNET_USE_LAPACK)
+
+  // We interface with the C-interface of MKL
+  // as this is the preferred way.
+  #include <mkl_lapacke.h>
+
+  #define MXNET_LAPACK_ROW_MAJOR LAPACK_ROW_MAJOR
+  #define MXNET_LAPACK_COL_MAJOR LAPACK_COL_MAJOR
+
+  // These function have already matching signature.
+  #define MXNET_LAPACK_spotrf LAPACKE_spotrf
+  #define MXNET_LAPACK_dpotrf LAPACKE_dpotrf
+  #define MXNET_LAPACK_spotri LAPACKE_spotri
+  #define MXNET_LAPACK_dpotri LAPACKE_dpotri
+  #define mxnet_lapack_sposv  LAPACKE_sposv
+  #define mxnet_lapack_dposv  LAPACKE_dposv
+
+  // The following functions differ in signature from the
+  // MXNET_LAPACK-signature and have to be wrapped.
+  #define MXNET_LAPACK_CWRAP_GELQF(prefix, dtype) \
+  inline int MXNET_LAPACK_##prefix##gelqf(int matrix_layout, int m, int n, \
+                                          dtype *a, int lda, dtype* tau, \
+                                          dtype* work, int lwork) { \
+    if (lwork != -1) { \
+      return LAPACKE_##prefix##gelqf(matrix_layout, m, n, a, lda, tau); \
+    } \
+    *work = 0; \
+    return 0; \
+  }
+  MXNET_LAPACK_CWRAP_GELQF(s, float)
+  MXNET_LAPACK_CWRAP_GELQF(d, double)
+
+  #define MXNET_LAPACK_CWRAP_ORGLQ(prefix, dtype) \
+  inline int MXNET_LAPACK_##prefix##orglq(int matrix_layout, int m, int n, \
+                                          dtype *a, int lda, dtype* tau, \
+                                          dtype* work, int lwork) { \
+    if (lwork != -1) { \
+      return LAPACKE_##prefix##orglq(matrix_layout, m, n, m, a, lda, tau); \
+    } \
+    *work = 0; \
+    return 0; \
+  }
+  MXNET_LAPACK_CWRAP_ORGLQ(s, float)
+  MXNET_LAPACK_CWRAP_ORGLQ(d, double)
+
+  // This has to be called internally in COL_MAJOR format even when 
matrix_layout
+  // is row-major as otherwise the eigenvectors would be returned as cols in a
+  // row-major matrix layout (see MKL documentation).
+  // We also have to allocate at least one DType element as workspace as the
+  // calling code assumes that the workspace has at least that size.
+  #define MXNET_LAPACK_CWRAP_SYEVD(prefix, dtype) \
+  inline int MXNET_LAPACK_##prefix##syevd(int matrix_layout, char uplo, int n, 
dtype *a, \
+                                          int lda, dtype *w, dtype *work, int 
lwork, \
+                                          int *iwork, int liwork) { \
+    if (lwork != -1) { \
+      char o(loup(uplo, (matrix_layout == MXNET_LAPACK_ROW_MAJOR))); \
+      return LAPACKE_##prefix##syevd(LAPACK_COL_MAJOR, 'V', o, n, a, lda, w); \
+    } \
+    *work = 1; \
+    *iwork = 0; \
+    return 0; \
+  }
+  MXNET_LAPACK_CWRAP_SYEVD(s, float)
+  MXNET_LAPACK_CWRAP_SYEVD(d, double)
+
+#elif MXNET_USE_LAPACK
+
+  #define MXNET_LAPACK_ROW_MAJOR 101
+  #define MXNET_LAPACK_COL_MAJOR 102
 
   // These functions can be called with either row- or col-major format.
   #define MXNET_LAPACK_CWRAPPER1(func, dtype) \
@@ -271,6 +341,9 @@ inline void flip<cpu, double>(int m, int n,
      " Ensure that lapack library is installed and build with USE_LAPACK=1 to 
get lapack" \
      " functionalities.")
 
+  #define MXNET_LAPACK_ROW_MAJOR 101
+  #define MXNET_LAPACK_COL_MAJOR 102
+
   // Define compilable stubs.
   #define MXNET_LAPACK_CWRAPPER1(func, dtype) \
   inline int MXNET_LAPACK_##func(int matrix_layout, char uplo, int n, dtype* 
a, int lda) { \
diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
index b1b35cfeac..b3e6573f78 100644
--- a/src/operator/linalg_impl.h
+++ b/src/operator/linalg_impl.h
@@ -56,7 +56,7 @@ inline void check_gemm(const Tensor<xpu, 2, DType>& A, const 
Tensor<xpu, 2, DTyp
     << "Non compatible matrix dimensions between inputs A and B for gemm";
 }
 
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
 
 #define LINALG_CPU_GEMM(fname, DType) \
 template<> inline \
@@ -98,7 +98,7 @@ void linalg_batch_gemm<cpu, DType>(const Tensor<cpu, 3, 
DType>& A, const Tensor<
   LOG(FATAL) << "linalg_batch_gemm not implemented by mxnet for cpu, needs 
cblas!"; \
 }
 
-#endif  // MSHADOW_USE_CBLAS == 1
+#endif  // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
 
 LINALG_CPU_GEMM(sgemm, float)
 LINALG_CPU_GEMM(dgemm, double)
@@ -253,7 +253,7 @@ inline void check_trsm(const Tensor<xpu, 2, DType>& A, 
const Tensor<xpu, 2, DTyp
     << "Non compatible matrix dimensions between inputs A and B for trsm";
 }
 
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
 
 #define LINALG_CPU_TRSM(fname, DType) \
 template<> inline \
@@ -292,7 +292,7 @@ void linalg_batch_trsm<cpu, DType>(const Tensor<cpu, 3, 
DType>& A, const Tensor<
   LOG(FATAL) << "linalg_batch_trsm not implemented, needs cblas!"; \
 }
 
-#endif  // MSHADOW_USE_CBLAS == 1
+#endif  // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
 
 LINALG_CPU_TRSM(strsm, float)
 LINALG_CPU_TRSM(dtrsm, double)
@@ -389,7 +389,7 @@ inline void linalg_gemm(const Tensor<xpu, 2, DType>& A,
   }
 }
 
-#if MSHADOW_USE_CBLAS == 0
+#if (MSHADOW_USE_CBLAS == 0 && MSHADOW_USE_MKL == 0)
 
 // A template for a cpu linalg_gemm implementation using mshadow::dot()
 #define LINALG_CPU_GEMM_NO_CBLAS(DType) \
@@ -443,7 +443,7 @@ void linalg_gemm<cpu, DType>(const Tensor<cpu, 2, DType>& 
A, \
 LINALG_CPU_GEMM_NO_CBLAS(float)
 LINALG_CPU_GEMM_NO_CBLAS(double)
 
-#endif  // (MSHADOW_USE_CBLAS == 0)
+#endif  // (MSHADOW_USE_CBLAS == 0 && MSHADOW_USE_MKL == 0)
 
 //////////////////////////////// TRMM 
////////////////////////////////////////////
 
@@ -463,7 +463,7 @@ inline void check_trmm(const Tensor<xpu, 2, DType>& A, 
const Tensor<xpu, 2, DTyp
     << "Non compatible matrix dimensions between inputs A and B for trmm";
 }
 
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
 
 #define LINALG_CPU_TRMM(fname, DType) \
 template<> inline \
@@ -485,7 +485,7 @@ void linalg_trmm<cpu, DType>(const Tensor<cpu, 2, DType>& 
A, const Tensor<cpu, 2
   LOG(FATAL) << "linalg_trmm not implemented, needs cblas!"; \
 }
 
-#endif  // MSHADOW_USE_CBLAS == 1
+#endif  // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
 
 #define LINALG_XPU_BATCH_TRMM(xpu, DType) \
 template<> inline \
@@ -735,7 +735,7 @@ void check_syrk(const Tensor<xpu, 2, DType>& A, const 
Tensor<xpu, 2, DType>& B,
     << "Non compatible matrix dimensions between inputs A and B for syrk";
 }
 
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
 
 #define LINALG_CPU_SYRK(fname, DType) \
 template<> inline \
@@ -758,7 +758,7 @@ void linalg_syrk<cpu, DType>(const Tensor<cpu, 2, DType>& 
A, \
   LOG(FATAL) << "linalg_syrk not implemented by mxnet for cpu, needs cblas!"; \
 }
 
-#endif  // MSHADOW_USE_CBLAS == 1
+#endif  // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
 
 #define LINALG_XPU_BATCH_SYRK(xpu, DType) \
 template<> inline \
@@ -811,7 +811,7 @@ void check_gelqf(const Tensor<xpu, 2, DType>& A,
   // Any checking that helps user debug potential problems.
   CHECK_LE(A.size(0), A.size(1))
     << "A must have num(rows) <= num(columns)";
-  CHECK_LT(A.size(0), work.size(0))
+  CHECK_LE(A.size(0), work.size(0))
     << "Size of work is too small";
 }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to