This is an automated email from the ASF dual-hosted git repository.

lausen pushed a commit to branch v1.6.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.6.x by this push:
     new 4da66fa  Backport #17596 (#17603)
4da66fa is described below

commit 4da66fa90988102d23c6702cc5a9b9d01346bbef
Author: Leonard Lausen <lau...@amazon.com>
AuthorDate: Mon Feb 17 09:24:28 2020 -0800

    Backport #17596 (#17603)
    
    * Fix transformer.cu interleaved matmul for cuda arch < 5  (#17596)
    
    cublasGemmBatchedEx is only supported for GPU with architecture 
capabilities equal or greater than 5.0.
    
    Fixes a bug in #16408
    
    * pin Markdown version to 3.1 in Julia doc build (#17549)
    
    * pin Sphinx due to autodocsumm issue with v4.2.0 (#17561)
    
    * pin python dependencies (#17556)
    
    * [CI] Fix static build pipeline (#17474)
    
    * 1.5.x CI fixes (#17426)
    
    * Fix numpy decorator
    
    * Workaround https://github.com/pytest-dev/pytest/issues/5903
    
    * Disable pylint warnings
    
    * Fix Edge build
    
    * Fix numpy decorator on Centos
    
    * Follow redirects when downloading apache-maven-3.3.9-bin.tar.gz
    
    Co-authored-by: Hao Jin <hjjn.a...@gmail.com>
    Co-authored-by: Aaron Markham <markh...@amazon.com>
---
 ci/docker/install/centos7_python.sh        |  4 +-
 ci/docker/install/centos7_scala.sh         |  4 +-
 ci/docker/install/requirements             |  1 +
 ci/docker/install/ubuntu_onnx.sh           |  4 +-
 ci/docker/install/ubuntu_publish.sh        | 19 ++++----
 ci/docker/install/ubuntu_scala.sh          |  4 +-
 docs/python_docs/environment.yml           |  2 +-
 julia/docs/Makefile                        | 13 +++++-
 src/operator/contrib/transformer.cu        | 71 +++++++++++++++++++++++++-----
 tools/caffe_converter/caffe_proto_utils.py |  2 +-
 tools/caffe_converter/convert_mean.py      |  2 +-
 11 files changed, 91 insertions(+), 35 deletions(-)

diff --git a/ci/docker/install/centos7_python.sh 
b/ci/docker/install/centos7_python.sh
index 8521cde..686cf14 100755
--- a/ci/docker/install/centos7_python.sh
+++ b/ci/docker/install/centos7_python.sh
@@ -31,5 +31,5 @@ curl "https://bootstrap.pypa.io/get-pip.py"; -o "get-pip.py"
 python2.7 get-pip.py
 python3.6 get-pip.py
 
-pip2 install nose pylint numpy nose-timer requests h5py scipy==1.0.1
-pip3 install nose pylint numpy nose-timer requests h5py scipy==1.0.1
+pip2 install nose pylint numpy nose-timer requests h5py scipy==1.2.1 
decorator==4.4.0
+pip3 install nose pylint numpy nose-timer requests h5py scipy==1.2.1 
decorator==4.4.0
diff --git a/ci/docker/install/centos7_scala.sh 
b/ci/docker/install/centos7_scala.sh
index 5a1c416..df0d7a1 100755
--- a/ci/docker/install/centos7_scala.sh
+++ b/ci/docker/install/centos7_scala.sh
@@ -27,8 +27,8 @@ export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk
 export PATH=$JAVA_HOME/bin:$PATH
 
 # Build from source with Maven
-curl -o apache-maven-3.3.9-bin.tar.gz 
http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
 \
-    || curl -o apache-maven-3.3.9-bin.tar.gz 
https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz
+curl -o apache-maven-3.3.9-bin.tar.gz -L 
http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
 \
+    || curl -o apache-maven-3.3.9-bin.tar.gz -L 
https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz
 
 tar xzf apache-maven-3.3.9-bin.tar.gz
 mkdir /usr/local/maven
diff --git a/ci/docker/install/requirements b/ci/docker/install/requirements
index fd716f5..61c9ef8 100644
--- a/ci/docker/install/requirements
+++ b/ci/docker/install/requirements
@@ -21,6 +21,7 @@
 boto3==1.9.229
 cpplint==1.3.0
 Cython==0.29.7
+CommonMark==0.5.4
 decorator==4.4.0
 h5py==2.8.0rc1
 mock==2.0.0
diff --git a/ci/docker/install/ubuntu_onnx.sh b/ci/docker/install/ubuntu_onnx.sh
index 3070289..1a220c7 100755
--- a/ci/docker/install/ubuntu_onnx.sh
+++ b/ci/docker/install/ubuntu_onnx.sh
@@ -31,5 +31,5 @@ apt-get update || true
 apt-get install -y libprotobuf-dev protobuf-compiler
 
 echo "Installing pytest, pytest-cov, protobuf, Pillow, ONNX and tabulate ..."
-pip2 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 
Pillow==5.0.0 tabulate==0.7.5
-pip3 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 
Pillow==5.0.0 tabulate==0.7.5
+pip2 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 
Pillow==5.0.0 tabulate==0.7.5 attrs==19.1.0
+pip3 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 
Pillow==5.0.0 tabulate==0.7.5 attrs==19.1.0
diff --git a/ci/docker/install/ubuntu_publish.sh 
b/ci/docker/install/ubuntu_publish.sh
index 65982ee..c3517e2 100755
--- a/ci/docker/install/ubuntu_publish.sh
+++ b/ci/docker/install/ubuntu_publish.sh
@@ -48,8 +48,8 @@ apt-get install -y git \
     pkg-config \
     openjdk-8-jdk
 
-curl -o apache-maven-3.3.9-bin.tar.gz 
http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
 \
-    || curl -o apache-maven-3.3.9-bin.tar.gz 
https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz
+curl -o apache-maven-3.3.9-bin.tar.gz -L 
http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
 \
+    || curl -o apache-maven-3.3.9-bin.tar.gz -L 
https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz
 
 tar xzf apache-maven-3.3.9-bin.tar.gz
 mkdir /usr/local/maven
@@ -57,14 +57,13 @@ mv apache-maven-3.3.9/ /usr/local/maven/
 update-alternatives --install /usr/bin/mvn mvn 
/usr/local/maven/apache-maven-3.3.9/bin/mvn 1
 update-ca-certificates -f
 
-apt-get install -y python python3
+apt-get install -y python python-pip python3 python3-pip
 
 # the version of the pip shipped with ubuntu may be too lower, install a 
recent version here
-wget -nv https://bootstrap.pypa.io/get-pip.py
-python3 get-pip.py
-python2 get-pip.py
+# Restrict pip version to <19 due to use of Python 3.4 on Ubuntu 14.04
+python2 -m pip install --upgrade 'pip<19'
+python3 -m pip install --upgrade 'pip<19'
 
-apt-get remove -y python3-urllib3
-
-pip2 install nose cpplint==1.3.0 'numpy>1.16.0,<2.0.0' nose-timer 
'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3
-pip3 install nose cpplint==1.3.0 pylint==2.3.1 'numpy>1.16.0,<2.0.0' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3
+# Restrict numpy version to <1.18 due to use of Python 3.4 on Ubuntu 14.04
+python2 -m pip install --upgrade --ignore-installed nose cpplint==1.3.0 
'numpy>1.16.0,<1.17' nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 
scipy==1.0.1 boto3
+python3 -m pip install --upgrade --ignore-installed nose cpplint==1.3.0 
pylint==2.3.1 'numpy>1.16.0,<1.18' nose-timer 'requests<2.19.0,>=2.18.4' 
h5py==2.8.0rc1 scipy==1.0.1 boto3
diff --git a/ci/docker/install/ubuntu_scala.sh 
b/ci/docker/install/ubuntu_scala.sh
index 9115bbc..d223b8e 100755
--- a/ci/docker/install/ubuntu_scala.sh
+++ b/ci/docker/install/ubuntu_scala.sh
@@ -40,8 +40,8 @@ apt-get install -y \
 
 # Ubuntu 14.04
 if [[ $(lsb_release -r | grep 14.04) ]]; then
-    curl -o apache-maven-3.3.9-bin.tar.gz 
http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
 \
-        || curl -o apache-maven-3.3.9-bin.tar.gz 
https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz
+    curl -o apache-maven-3.3.9-bin.tar.gz -L 
http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
 \
+        || curl -o apache-maven-3.3.9-bin.tar.gz -L 
https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz
 
     tar xzf apache-maven-3.3.9-bin.tar.gz
     mkdir /usr/local/maven
diff --git a/docs/python_docs/environment.yml b/docs/python_docs/environment.yml
index 5f66d77..7d9d42d 100644
--- a/docs/python_docs/environment.yml
+++ b/docs/python_docs/environment.yml
@@ -23,7 +23,7 @@ dependencies:
 - pip
 - python
 - jupyter
-- sphinx
+- sphinx==2.2.2
 - matplotlib
 - notebook
 - pip:
diff --git a/julia/docs/Makefile b/julia/docs/Makefile
index e42b8cd..66f36df 100644
--- a/julia/docs/Makefile
+++ b/julia/docs/Makefile
@@ -20,8 +20,17 @@ all:
          'using Pkg; \
           Pkg.develop(PackageSpec(name="MXNet", path = joinpath(pwd(), "..")))'
        julia --color=yes --project=./ ./make.jl
-       pip install --user pygments mkdocs mkdocs-material python-markdown-math
-       ~/.local/bin/mkdocs build
+       pip install --user Markdown==3.1 \
+    mkdocs==1.0.4 \
+               mkdocs-material==4.6.0 \
+         pygments==2.5.2 \
+         pymdown-extensions==6.2.1 \
+         python-markdown-math==0.6
+       export LC_ALL="C.UTF-8"
+       export LANG="C.UTF-8"
+       echo $(LC_ALL)
+       echo $(LANG)
+       LC_ALL="C.UTF-8" ~/.local/bin/mkdocs build
 
 clean:
        rm -rvf venv
diff --git a/src/operator/contrib/transformer.cu 
b/src/operator/contrib/transformer.cu
index e152669..59029ea 100644
--- a/src/operator/contrib/transformer.cu
+++ b/src/operator/contrib/transformer.cu
@@ -50,18 +50,65 @@ void CublasStridedBatchedGemm(mshadow::Stream<gpu>* s, bool 
transA, bool transB,
       << "Must init CuBLAS handle in stream";
 
   cublasHandle_t blas_handle = mshadow::Stream<gpu>::GetBlasHandle(s);
-  auto err = CUBLAS_STATUS_SUCCESS;
-  // TODO(cfujitsang): handle computation_precision
-  err = cublasGemmStridedBatchedEx(
-      blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB),
-      static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
-      reinterpret_cast<void*>(&alpha),
-      a, CublasType<DType>::kCudaFlag, static_cast<int>(lda), strideA,
-      b, CublasType<DType>::kCudaFlag, static_cast<int>(ldb), strideB,
-      reinterpret_cast<void*>(&beta),
-      c, CublasType<DType>::kCudaFlag, static_cast<int>(ldc), strideC,
-      static_cast<int>(batchCount), CUDA_R_32F, algo);
-  CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas gemmEx fail.";
+  // cublasGemmStridedBatchedEx is only supported for GPU with architecture
+  // capabilities equal or greater than 5.0. Fall back to
+  // cublasSgemmStridedBatched, which doesn't support implicit conversion
+  // to half-precision to use TensorCores
+  auto cc_major = (s->prop).major;
+  if (cc_major >= 5) {
+    CUBLAS_CALL(cublasGemmStridedBatchedEx(
+        blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB),
+        static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+        reinterpret_cast<void*>(&alpha),
+        a, CublasType<DType>::kCudaFlag, static_cast<int>(lda), strideA,
+        b, CublasType<DType>::kCudaFlag, static_cast<int>(ldb), strideB,
+        reinterpret_cast<void*>(&beta),
+        c, CublasType<DType>::kCudaFlag, static_cast<int>(ldc), strideC,
+        static_cast<int>(batchCount), CUDA_R_32F, algo));
+  } else {
+    if (std::is_same<DType, float>::value) {
+      CUBLAS_CALL(cublasSgemmStridedBatched(
+          blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB),
+          static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+          reinterpret_cast<float*>(&alpha),
+          reinterpret_cast<const float*>(a),
+          static_cast<int>(lda), strideA,
+          reinterpret_cast<const float*>(b),
+          static_cast<int>(ldb), strideB,
+          reinterpret_cast<float*>(&beta),
+          reinterpret_cast<float*>(c),
+          static_cast<int>(ldc), strideC,
+          static_cast<int>(batchCount)));
+    } else if (std::is_same<DType, double>::value) {
+      CUBLAS_CALL(cublasDgemmStridedBatched(
+          blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB),
+          static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+          reinterpret_cast<double*>(&alpha),
+          reinterpret_cast<const double*>(a),
+          static_cast<int>(lda), strideA,
+          reinterpret_cast<const double*>(b),
+          static_cast<int>(ldb), strideB,
+          reinterpret_cast<double*>(&beta),
+          reinterpret_cast<double*>(c),
+          static_cast<int>(ldc), strideC,
+          static_cast<int>(batchCount)));
+    } else if (std::is_same<DType, mshadow::half::half_t>::value) {
+      CUBLAS_CALL(cublasHgemmStridedBatched(
+          blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB),
+          static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+          reinterpret_cast<__half*>(&alpha),
+          reinterpret_cast<const __half*>(a),
+          static_cast<int>(lda), strideA,
+          reinterpret_cast<const __half*>(b),
+          static_cast<int>(ldb), strideB,
+          reinterpret_cast<__half*>(&beta),
+          reinterpret_cast<__half*>(c),
+          static_cast<int>(ldc), strideC,
+          static_cast<int>(batchCount)));
+    } else {
+      LOG(FATAL) << "Unsupported DType in CublasStridedBatchedGemm.";
+    }
+  }
 #else
   LOG(FATAL) << "Not implemented with CUDA < 9.1";
 #endif
diff --git a/tools/caffe_converter/caffe_proto_utils.py 
b/tools/caffe_converter/caffe_proto_utils.py
index 8d61834..54cd952 100644
--- a/tools/caffe_converter/caffe_proto_utils.py
+++ b/tools/caffe_converter/caffe_proto_utils.py
@@ -196,7 +196,7 @@ def read_caffe_mean(caffe_mean_file):
         mean_blob.ParseFromString(f.read())
 
     img_mean_np = np.array(mean_blob.data)
-    img_mean_np = img_mean_np.reshape(mean_blob.channels, mean_blob.height, 
mean_blob.width)
+    img_mean_np = img_mean_np.reshape(mean_blob.channels, mean_blob.height, 
mean_blob.width)  # pylint: disable=too-many-function-args
 
     # swap channels from Caffe BGR to RGB
     img_mean_np[[0, 2], :, :] = img_mean_np[[2, 0], :, :]
diff --git a/tools/caffe_converter/convert_mean.py 
b/tools/caffe_converter/convert_mean.py
index 1a3df71..1debfe5 100644
--- a/tools/caffe_converter/convert_mean.py
+++ b/tools/caffe_converter/convert_mean.py
@@ -42,7 +42,7 @@ def convert_mean(binaryproto_fname, output=None):
         mean_blob.ParseFromString(f.read())
 
     img_mean_np = np.array(mean_blob.data)
-    img_mean_np = img_mean_np.reshape(
+    img_mean_np = img_mean_np.reshape(  # pylint: 
disable=too-many-function-args
         mean_blob.channels, mean_blob.height, mean_blob.width
     )
     # swap channels from Caffe BGR to RGB

Reply via email to