This is an automated email from the ASF dual-hosted git repository. lausen pushed a commit to branch v1.6.x in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/v1.6.x by this push: new 4da66fa Backport #17596 (#17603) 4da66fa is described below commit 4da66fa90988102d23c6702cc5a9b9d01346bbef Author: Leonard Lausen <lau...@amazon.com> AuthorDate: Mon Feb 17 09:24:28 2020 -0800 Backport #17596 (#17603) * Fix transformer.cu interleaved matmul for cuda arch < 5 (#17596) cublasGemmBatchedEx is only supported for GPU with architecture capabilities equal or greater than 5.0. Fixes a bug in #16408 * pin Markdown version to 3.1 in Julia doc build (#17549) * pin Sphinx due to autodocsumm issue with v4.2.0 (#17561) * pin python dependencies (#17556) * [CI] Fix static build pipeline (#17474) * 1.5.x CI fixes (#17426) * Fix numpy decorator * Workaround https://github.com/pytest-dev/pytest/issues/5903 * Disable pylint warnings * Fix Edge build * Fix numpy decorator on Centos * Follow redirects when downloading apache-maven-3.3.9-bin.tar.gz Co-authored-by: Hao Jin <hjjn.a...@gmail.com> Co-authored-by: Aaron Markham <markh...@amazon.com> --- ci/docker/install/centos7_python.sh | 4 +- ci/docker/install/centos7_scala.sh | 4 +- ci/docker/install/requirements | 1 + ci/docker/install/ubuntu_onnx.sh | 4 +- ci/docker/install/ubuntu_publish.sh | 19 ++++---- ci/docker/install/ubuntu_scala.sh | 4 +- docs/python_docs/environment.yml | 2 +- julia/docs/Makefile | 13 +++++- src/operator/contrib/transformer.cu | 71 +++++++++++++++++++++++++----- tools/caffe_converter/caffe_proto_utils.py | 2 +- tools/caffe_converter/convert_mean.py | 2 +- 11 files changed, 91 insertions(+), 35 deletions(-) diff --git a/ci/docker/install/centos7_python.sh b/ci/docker/install/centos7_python.sh index 8521cde..686cf14 100755 --- a/ci/docker/install/centos7_python.sh +++ b/ci/docker/install/centos7_python.sh @@ -31,5 +31,5 @@ curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py" python2.7 get-pip.py python3.6 get-pip.py -pip2 install nose pylint numpy nose-timer requests h5py scipy==1.0.1 -pip3 install nose pylint numpy nose-timer requests h5py scipy==1.0.1 +pip2 install nose pylint numpy nose-timer requests h5py scipy==1.2.1 decorator==4.4.0 +pip3 install nose pylint numpy nose-timer requests h5py scipy==1.2.1 decorator==4.4.0 diff --git a/ci/docker/install/centos7_scala.sh b/ci/docker/install/centos7_scala.sh index 5a1c416..df0d7a1 100755 --- a/ci/docker/install/centos7_scala.sh +++ b/ci/docker/install/centos7_scala.sh @@ -27,8 +27,8 @@ export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk export PATH=$JAVA_HOME/bin:$PATH # Build from source with Maven -curl -o apache-maven-3.3.9-bin.tar.gz http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ - || curl -o apache-maven-3.3.9-bin.tar.gz https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz +curl -o apache-maven-3.3.9-bin.tar.gz -L http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ + || curl -o apache-maven-3.3.9-bin.tar.gz -L https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz tar xzf apache-maven-3.3.9-bin.tar.gz mkdir /usr/local/maven diff --git a/ci/docker/install/requirements b/ci/docker/install/requirements index fd716f5..61c9ef8 100644 --- a/ci/docker/install/requirements +++ b/ci/docker/install/requirements @@ -21,6 +21,7 @@ boto3==1.9.229 cpplint==1.3.0 Cython==0.29.7 +CommonMark==0.5.4 decorator==4.4.0 h5py==2.8.0rc1 mock==2.0.0 diff --git a/ci/docker/install/ubuntu_onnx.sh b/ci/docker/install/ubuntu_onnx.sh index 3070289..1a220c7 100755 --- a/ci/docker/install/ubuntu_onnx.sh +++ b/ci/docker/install/ubuntu_onnx.sh @@ -31,5 +31,5 @@ apt-get update || true apt-get install -y libprotobuf-dev protobuf-compiler echo "Installing pytest, pytest-cov, protobuf, Pillow, ONNX and tabulate ..." -pip2 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 Pillow==5.0.0 tabulate==0.7.5 -pip3 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 Pillow==5.0.0 tabulate==0.7.5 +pip2 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 Pillow==5.0.0 tabulate==0.7.5 attrs==19.1.0 +pip3 install pytest==3.6.3 pytest-cov==2.5.1 protobuf==3.5.2 onnx==1.3.0 Pillow==5.0.0 tabulate==0.7.5 attrs==19.1.0 diff --git a/ci/docker/install/ubuntu_publish.sh b/ci/docker/install/ubuntu_publish.sh index 65982ee..c3517e2 100755 --- a/ci/docker/install/ubuntu_publish.sh +++ b/ci/docker/install/ubuntu_publish.sh @@ -48,8 +48,8 @@ apt-get install -y git \ pkg-config \ openjdk-8-jdk -curl -o apache-maven-3.3.9-bin.tar.gz http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ - || curl -o apache-maven-3.3.9-bin.tar.gz https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz +curl -o apache-maven-3.3.9-bin.tar.gz -L http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ + || curl -o apache-maven-3.3.9-bin.tar.gz -L https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz tar xzf apache-maven-3.3.9-bin.tar.gz mkdir /usr/local/maven @@ -57,14 +57,13 @@ mv apache-maven-3.3.9/ /usr/local/maven/ update-alternatives --install /usr/bin/mvn mvn /usr/local/maven/apache-maven-3.3.9/bin/mvn 1 update-ca-certificates -f -apt-get install -y python python3 +apt-get install -y python python-pip python3 python3-pip # the version of the pip shipped with ubuntu may be too lower, install a recent version here -wget -nv https://bootstrap.pypa.io/get-pip.py -python3 get-pip.py -python2 get-pip.py +# Restrict pip version to <19 due to use of Python 3.4 on Ubuntu 14.04 +python2 -m pip install --upgrade 'pip<19' +python3 -m pip install --upgrade 'pip<19' -apt-get remove -y python3-urllib3 - -pip2 install nose cpplint==1.3.0 'numpy>1.16.0,<2.0.0' nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3 -pip3 install nose cpplint==1.3.0 pylint==2.3.1 'numpy>1.16.0,<2.0.0' nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3 +# Restrict numpy version to <1.18 due to use of Python 3.4 on Ubuntu 14.04 +python2 -m pip install --upgrade --ignore-installed nose cpplint==1.3.0 'numpy>1.16.0,<1.17' nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3 +python3 -m pip install --upgrade --ignore-installed nose cpplint==1.3.0 pylint==2.3.1 'numpy>1.16.0,<1.18' nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1 boto3 diff --git a/ci/docker/install/ubuntu_scala.sh b/ci/docker/install/ubuntu_scala.sh index 9115bbc..d223b8e 100755 --- a/ci/docker/install/ubuntu_scala.sh +++ b/ci/docker/install/ubuntu_scala.sh @@ -40,8 +40,8 @@ apt-get install -y \ # Ubuntu 14.04 if [[ $(lsb_release -r | grep 14.04) ]]; then - curl -o apache-maven-3.3.9-bin.tar.gz http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ - || curl -o apache-maven-3.3.9-bin.tar.gz https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz + curl -o apache-maven-3.3.9-bin.tar.gz -L http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ + || curl -o apache-maven-3.3.9-bin.tar.gz -L https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz tar xzf apache-maven-3.3.9-bin.tar.gz mkdir /usr/local/maven diff --git a/docs/python_docs/environment.yml b/docs/python_docs/environment.yml index 5f66d77..7d9d42d 100644 --- a/docs/python_docs/environment.yml +++ b/docs/python_docs/environment.yml @@ -23,7 +23,7 @@ dependencies: - pip - python - jupyter -- sphinx +- sphinx==2.2.2 - matplotlib - notebook - pip: diff --git a/julia/docs/Makefile b/julia/docs/Makefile index e42b8cd..66f36df 100644 --- a/julia/docs/Makefile +++ b/julia/docs/Makefile @@ -20,8 +20,17 @@ all: 'using Pkg; \ Pkg.develop(PackageSpec(name="MXNet", path = joinpath(pwd(), "..")))' julia --color=yes --project=./ ./make.jl - pip install --user pygments mkdocs mkdocs-material python-markdown-math - ~/.local/bin/mkdocs build + pip install --user Markdown==3.1 \ + mkdocs==1.0.4 \ + mkdocs-material==4.6.0 \ + pygments==2.5.2 \ + pymdown-extensions==6.2.1 \ + python-markdown-math==0.6 + export LC_ALL="C.UTF-8" + export LANG="C.UTF-8" + echo $(LC_ALL) + echo $(LANG) + LC_ALL="C.UTF-8" ~/.local/bin/mkdocs build clean: rm -rvf venv diff --git a/src/operator/contrib/transformer.cu b/src/operator/contrib/transformer.cu index e152669..59029ea 100644 --- a/src/operator/contrib/transformer.cu +++ b/src/operator/contrib/transformer.cu @@ -50,18 +50,65 @@ void CublasStridedBatchedGemm(mshadow::Stream<gpu>* s, bool transA, bool transB, << "Must init CuBLAS handle in stream"; cublasHandle_t blas_handle = mshadow::Stream<gpu>::GetBlasHandle(s); - auto err = CUBLAS_STATUS_SUCCESS; - // TODO(cfujitsang): handle computation_precision - err = cublasGemmStridedBatchedEx( - blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB), - static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), - reinterpret_cast<void*>(&alpha), - a, CublasType<DType>::kCudaFlag, static_cast<int>(lda), strideA, - b, CublasType<DType>::kCudaFlag, static_cast<int>(ldb), strideB, - reinterpret_cast<void*>(&beta), - c, CublasType<DType>::kCudaFlag, static_cast<int>(ldc), strideC, - static_cast<int>(batchCount), CUDA_R_32F, algo); - CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas gemmEx fail."; + // cublasGemmStridedBatchedEx is only supported for GPU with architecture + // capabilities equal or greater than 5.0. Fall back to + // cublasSgemmStridedBatched, which doesn't support implicit conversion + // to half-precision to use TensorCores + auto cc_major = (s->prop).major; + if (cc_major >= 5) { + CUBLAS_CALL(cublasGemmStridedBatchedEx( + blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB), + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<void*>(&alpha), + a, CublasType<DType>::kCudaFlag, static_cast<int>(lda), strideA, + b, CublasType<DType>::kCudaFlag, static_cast<int>(ldb), strideB, + reinterpret_cast<void*>(&beta), + c, CublasType<DType>::kCudaFlag, static_cast<int>(ldc), strideC, + static_cast<int>(batchCount), CUDA_R_32F, algo)); + } else { + if (std::is_same<DType, float>::value) { + CUBLAS_CALL(cublasSgemmStridedBatched( + blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB), + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<float*>(&alpha), + reinterpret_cast<const float*>(a), + static_cast<int>(lda), strideA, + reinterpret_cast<const float*>(b), + static_cast<int>(ldb), strideB, + reinterpret_cast<float*>(&beta), + reinterpret_cast<float*>(c), + static_cast<int>(ldc), strideC, + static_cast<int>(batchCount))); + } else if (std::is_same<DType, double>::value) { + CUBLAS_CALL(cublasDgemmStridedBatched( + blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB), + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<double*>(&alpha), + reinterpret_cast<const double*>(a), + static_cast<int>(lda), strideA, + reinterpret_cast<const double*>(b), + static_cast<int>(ldb), strideB, + reinterpret_cast<double*>(&beta), + reinterpret_cast<double*>(c), + static_cast<int>(ldc), strideC, + static_cast<int>(batchCount))); + } else if (std::is_same<DType, mshadow::half::half_t>::value) { + CUBLAS_CALL(cublasHgemmStridedBatched( + blas_handle, CublasTransposeOp(transA), CublasTransposeOp(transB), + static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), + reinterpret_cast<__half*>(&alpha), + reinterpret_cast<const __half*>(a), + static_cast<int>(lda), strideA, + reinterpret_cast<const __half*>(b), + static_cast<int>(ldb), strideB, + reinterpret_cast<__half*>(&beta), + reinterpret_cast<__half*>(c), + static_cast<int>(ldc), strideC, + static_cast<int>(batchCount))); + } else { + LOG(FATAL) << "Unsupported DType in CublasStridedBatchedGemm."; + } + } #else LOG(FATAL) << "Not implemented with CUDA < 9.1"; #endif diff --git a/tools/caffe_converter/caffe_proto_utils.py b/tools/caffe_converter/caffe_proto_utils.py index 8d61834..54cd952 100644 --- a/tools/caffe_converter/caffe_proto_utils.py +++ b/tools/caffe_converter/caffe_proto_utils.py @@ -196,7 +196,7 @@ def read_caffe_mean(caffe_mean_file): mean_blob.ParseFromString(f.read()) img_mean_np = np.array(mean_blob.data) - img_mean_np = img_mean_np.reshape(mean_blob.channels, mean_blob.height, mean_blob.width) + img_mean_np = img_mean_np.reshape(mean_blob.channels, mean_blob.height, mean_blob.width) # pylint: disable=too-many-function-args # swap channels from Caffe BGR to RGB img_mean_np[[0, 2], :, :] = img_mean_np[[2, 0], :, :] diff --git a/tools/caffe_converter/convert_mean.py b/tools/caffe_converter/convert_mean.py index 1a3df71..1debfe5 100644 --- a/tools/caffe_converter/convert_mean.py +++ b/tools/caffe_converter/convert_mean.py @@ -42,7 +42,7 @@ def convert_mean(binaryproto_fname, output=None): mean_blob.ParseFromString(f.read()) img_mean_np = np.array(mean_blob.data) - img_mean_np = img_mean_np.reshape( + img_mean_np = img_mean_np.reshape( # pylint: disable=too-many-function-args mean_blob.channels, mean_blob.height, mean_blob.width ) # swap channels from Caffe BGR to RGB