commit:     fad64b065493a4406304cf7cb4d4aa36a7876555
Author:     Miezhiko <Miezhiko <AT> gmail <DOT> com>
AuthorDate: Mon Dec 27 10:56:29 2021 +0000
Commit:     Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org>
CommitDate: Mon Dec 27 11:01:04 2021 +0000
URL:        https://gitweb.gentoo.org/proj/sci.git/commit/?id=fad64b06

sci-libs/pytorch: fixes for cuda USE flag, nccl update

Signed-off-by: Miezhiko <Miezhiko <AT> gmail.com>
Closes: https://github.com/gentoo/sci/pull/1132
Signed-off-by: Andrew Ammerlaan <andrewammerlaan <AT> gentoo.org>

 .../files/pytorch-1.10.1-nccl-nvccflags.patch      | 37 ++++++++++++++++++++++
 sci-libs/pytorch/pytorch-1.10.1.ebuild             |  7 ++--
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/sci-libs/pytorch/files/pytorch-1.10.1-nccl-nvccflags.patch 
b/sci-libs/pytorch/files/pytorch-1.10.1-nccl-nvccflags.patch
new file mode 100644
index 000000000..0ac12a1e7
--- /dev/null
+++ b/sci-libs/pytorch/files/pytorch-1.10.1-nccl-nvccflags.patch
@@ -0,0 +1,37 @@
+From 7b04cd503fa1a6265893e0320ee246bcc0bd98cf Mon Sep 17 00:00:00 2001
+From: Miezhiko <miezh...@gmail.com>
+Date: Mon, 27 Dec 2021 14:48:12 +0400
+Subject: [PATCH] neutral flags
+
+---
+ makefiles/common.mk | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+diff --git a/makefiles/common.mk b/makefiles/common.mk
+index 64f8d2dc6..7a979c90f 100644
+--- a/makefiles/common.mk
++++ b/makefiles/common.mk
+@@ -55,7 +55,7 @@ CXXFLAGS   := -DCUDA_MAJOR=$(CUDA_MAJOR) 
-DCUDA_MINOR=$(CUDA_MINOR) -fPIC -fvisi
+ # Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it 
will cause kernel launch errors)
+ # 512 : 120, 640 : 96, 768 : 80, 1024 : 60
+ # We would not have to set this if we used __launch_bounds__, but this only 
works on kernels, not on functions.
+-NVCUFLAGS  := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 --expt-extended-lambda 
-Xptxas -maxrregcount=96 -Xfatbin -compress-all
++NVCUFLAGS  := $(NVCCFLAGS) $(NVCC_GENCODE) -std=c++11 --expt-extended-lambda 
-Xptxas -maxrregcount=96 -Xfatbin -compress-all
+ # Use addprefix so that we can specify more than one path
+ NVLDFLAGS  := -L${CUDA_LIB} -lcudart -lrt
+ 
+@@ -69,14 +69,6 @@ NVLDFLAGS   += ${GCOV_FLAGS:%=-Xcompiler %}
+ # $(warning GCOV_FLAGS=${GCOV_FLAGS})
+ ########## GCOV ##########
+ 
+-ifeq ($(DEBUG), 0)
+-NVCUFLAGS += -O3
+-CXXFLAGS  += -O3 -g
+-else
+-NVCUFLAGS += -O0 -G -g
+-CXXFLAGS  += -O0 -g -ggdb3
+-endif
+-
+ ifneq ($(VERBOSE), 0)
+ NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra,-Wno-unused-parameter
+ CXXFLAGS  += -Wall -Wextra

diff --git a/sci-libs/pytorch/pytorch-1.10.1.ebuild 
b/sci-libs/pytorch/pytorch-1.10.1.ebuild
index a7bbb62ac..948d595a7 100644
--- a/sci-libs/pytorch/pytorch-1.10.1.ebuild
+++ b/sci-libs/pytorch/pytorch-1.10.1.ebuild
@@ -28,7 +28,7 @@ 
https://github.com/facebookincubator/gloo/archive/c22a5cfba94edf8ea4f53a174d38aa
 
https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.tar.gz
 -> googletest-e2239ee6043f73722e7aa812a459f54a28552929.tar.gz
 
https://github.com/intel/ideep/archive/9ca27bbfd88fa1469cbf0467bd6f14cd1738fa40.tar.gz
 -> ideep-9ca27bbfd88fa1469cbf0467bd6f14cd1738fa40.tar.gz
 https://github.com/intel/mkl-dnn/archive/5ef631a0.tar.gz -> 
mkl-dnn-5ef631a0.tar.gz
-cuda? ( https://github.com/NVIDIA/nccl/archive/033d7995.tar.gz -> 
nccl-033d7995.tar.gz )
+cuda? ( 
https://github.com/NVIDIA/nccl/archive/c5790b36722d5b41ee2a9b2bad69e364180ffd22.tar.gz
 -> nccl-c5790b36722d5b41ee2a9b2bad69e364180ffd22.tar.gz )
 
https://github.com/Maratyszcza/NNPACK/archive/c07e3a0400713d546e0dea2d5466dd22ea389c73.tar.gz
 -> NNPACK-c07e3a0400713d546e0dea2d5466dd22ea389c73.tar.gz
 https://github.com/onnx/onnx/archive/a82c6a70.tar.gz -> onnx-a82c6a70.tar.gz
 
https://github.com/onnx/onnx-tensorrt/archive/c153211418a7c57ce071d9ce2a41f8d1c85a878f.tar.gz
 -> onnx-tensorrt-c153211418a7c57ce071d9ce2a41f8d1c85a878f.tar.gz
@@ -168,7 +168,7 @@ src_prepare() {
        rmdir third_party/ideep/mkl-dnn || die
        ln -sv "${WORKDIR}"/mkl-dnn-5ef631a030a6f73131c77892041042805a06064f 
third_party/ideep/mkl-dnn || die
        rmdir third_party/nccl/nccl || die
-       ln -sv "${WORKDIR}"/nccl-033d799524fb97629af5ac2f609de367472b2696 
third_party/nccl/nccl || die
+       ln -sv "${WORKDIR}"/nccl-c5790b36722d5b41ee2a9b2bad69e364180ffd22 
third_party/nccl/nccl || die
        rmdir third_party/NNPACK || die
        ln -sv "${WORKDIR}"/NNPACK-c07e3a0400713d546e0dea2d5466dd22ea389c73 
third_party/NNPACK || die
        rmdir third_party/onnx || die
@@ -208,9 +208,10 @@ src_prepare() {
 
        if use cuda; then
                cd third_party/nccl/nccl || die
-               eapply "${FILESDIR}"/${PN}-1.6.0-nccl-nvccflags.patch
+               eapply "${FILESDIR}"/${PN}-1.10.1-nccl-nvccflags.patch
                cuda_src_prepare
                export CUDAHOSTCXX=$(cuda_gccdir)/g++
+               cd "${S}"
        fi
 
        if use rocm; then

Reply via email to