commit:     c4c3a9517f1f59dd2ac5bf656a37bff862457c7b
Author:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Mon Jul 28 05:50:54 2025 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon Jul 28 05:50:54 2025 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=c4c3a951

dev-libs/cutlass: add 3.9.2

Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 dev-libs/cutlass/Manifest             |   1 +
 dev-libs/cutlass/cutlass-3.9.2.ebuild | 146 ++++++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)

diff --git a/dev-libs/cutlass/Manifest b/dev-libs/cutlass/Manifest
index 7d37a1b3f103..25d66b3cd8e8 100644
--- a/dev-libs/cutlass/Manifest
+++ b/dev-libs/cutlass/Manifest
@@ -2,3 +2,4 @@ DIST cutlass-3.4.1.tar.gz 23351905 BLAKE2B 
6524d70a4f65fc28c7150aa44bfbb5cc64c1b
 DIST cutlass-3.5.1.tar.gz 29257911 BLAKE2B 
680a67beabf821873655ea8145a84a0a6084b5d9dbd03bdc1b49a7964bcd4e7e5e3b213fea46d09583715e8862795455cbc47e5dac4e5bba546b7aeaf31881ce
 SHA512 
f9bc493a80e959b21d3adbe85987d375c052f6095be9e13b871f890a6ead093bfb68712eae206fd8fc3f0a2ac06d96760ffec7939869b0e12c4c37788184cc21
 DIST cutlass-3.8.0.tar.gz 31021072 BLAKE2B 
4dd85f7c0d3452c2a194902fcd0afd7de3a3f17f86f477628d5e5f416ac885a86ed1fbbf2a9959a46e60e38a93400a7ec99bad1f980b0a4be36fad0de887ec0b
 SHA512 
a08aac281fb3bdea82c0a044dc643c40e4803d02e55bbea450021cb7a5472aed86e79c5df41cd981976af8403f18cc48d8069045c4e68339430d3a3caeb109ac
 DIST cutlass-3.9.1.tar.gz 31532887 BLAKE2B 
bdfddc707ee5c1303bebcefa89cc956271f140d6c2b1e928d2e4510def272cedcbc37b884a62ca371f967638f335d5e9fc63adecc862bf75861128cdffd57053
 SHA512 
1507b9b0a7caf055c7d6e3d4c6a4012ff46daf7a9ccc0ac6dda6ba85ae226f692489854ad74c909b946c9a111c280d0d05efc548dbd5d9497b928caaf46f3611
+DIST cutlass-3.9.2.tar.gz 31534258 BLAKE2B 
04462b3c6983f96b2027821408c4de30bf6b2e18e986ddebaf4f9d5572df354273116603ccc0ac618c61e03b981972e6d7786f354aa4f5e08d185cf7e4ad8e1d
 SHA512 
d45a9e4908b5886259acc1ffd4c8e4c6072801ad45909f365d599510b9989d3313438f2fa5cbee5c1e916e496a0b95bda85f79de3c38502d73e2b9206f868822

diff --git a/dev-libs/cutlass/cutlass-3.9.2.ebuild 
b/dev-libs/cutlass/cutlass-3.9.2.ebuild
new file mode 100644
index 000000000000..25b85cb0784b
--- /dev/null
+++ b/dev-libs/cutlass/cutlass-3.9.2.ebuild
@@ -0,0 +1,146 @@
+# Copyright 2023-2025 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+PYTHON_COMPAT=( python3_{11..13} )
+
+inherit cuda cmake python-any-r1 flag-o-matic toolchain-funcs
+
+DESCRIPTION="CUDA Templates for Linear Algebra Subroutines"
+HOMEPAGE="https://github.com/NVIDIA/cutlass";
+SRC_URI="https://github.com/NVIDIA/${PN}/archive/refs/tags/v${PV}.tar.gz
+       -> ${P}.tar.gz"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="~amd64"
+
+X86_CPU_FEATURES=(
+       f16c:f16c
+)
+CPU_FEATURES=( "${X86_CPU_FEATURES[@]/#/cpu_flags_x86_}" )
+
+IUSE="clang-cuda cublas cudnn doc dot examples +headers-only jumbo-build 
performance profiler test tools ${CPU_FEATURES[*]%:*}"
+
+REQUIRED_USE="
+       headers-only? (
+               !examples !test
+       )
+       test? ( tools )
+"
+
+RESTRICT="!test? ( test )"
+
+RDEPEND="
+       dev-util/nvidia-cuda-toolkit:=
+"
+DEPEND="${RDEPEND}
+       test? (
+               ${PYTHON_DEPS}
+               cudnn? (
+                       dev-libs/cudnn:=
+               )
+       )
+       tools? (
+               ${PYTHON_DEPS}
+       )
+"
+
+pkg_setup() {
+       if use test || use tools; then
+               python-any-r1_pkg_setup
+       fi
+}
+
+src_prepare() {
+       cmake_src_prepare
+
+       sed \
+               -e '/-std=/s/17/20/g' \
+               -i \
+                       CMakeLists.txt \
+                       python/cutlass/backend/compiler.py \
+                       python/cutlass/emit/pytorch.py \
+                       python/docs/_modules/cutlass/emit/pytorch.html \
+                       test/unit/nvrtc/thread/nvrtc_contraction.cu \
+                       test/unit/nvrtc/thread/testbed.h \
+                       media/docs/cpp/ide_setup.md \
+               || die
+
+}
+
+src_configure() {
+       # we can use clang as default
+       if use clang-cuda && ! tc-is-clang ; then
+               export CC="${CHOST}-clang"
+               export CXX="${CHOST}-clang++"
+       else
+               tc-export CXX CC
+       fi
+
+       # clang-cuda needs to filter mfpmath
+       if use clang-cuda ; then
+               filter-mfpmath sse
+               filter-mfpmath i386
+       fi
+       if use clang-cuda ; then
+               export CUDACXX=clang++
+       fi
+
+       cuda_add_sandbox
+       addpredict "/dev/char/"
+
+       local mycmakeargs=(
+               -DCMAKE_POLICY_DEFAULT_CMP0156="OLD" # cutlass_add_library
+
+               # -DCMAKE_CUDA_COMPILER="$(cuda_get_host_compiler)" # 
nvcc/clang++
+               -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f)"
+
+               -DCMAKE_DISABLE_FIND_PACKAGE_Doxygen="$(usex !doc)"
+
+               -DCUTLASS_REVISION="${PVR}"
+               -DCUTLASS_ENABLE_CUBLAS="$(usex cublas)"
+               -DCUTLASS_ENABLE_CUDNN="$(usex cudnn)"
+               -DCUTLASS_ENABLE_EXAMPLES="$(usex examples)"
+               -DCUTLASS_ENABLE_F16C="$(usex cpu_flags_x86_f16c)"
+               -DCUTLASS_ENABLE_GTEST_UNIT_TESTS="$(usex test)"
+               -DCUTLASS_ENABLE_HEADERS_ONLY="$(usex headers-only)"
+               -DCUTLASS_ENABLE_LIBRARY="$(usex !headers-only)"
+               -DCUTLASS_ENABLE_PERFORMANCE="$(usex performance)"
+               -DCUTLASS_ENABLE_PROFILER="$(usex profiler)"
+               -DCUTLASS_ENABLE_PROFILER_UNIT_TESTS="$(usex test "$(usex 
profiler)")"
+               -DCUTLASS_ENABLE_TESTS="$(usex test)"
+               -DCUTLASS_ENABLE_TOOLS="$(usex tools)"
+               -DCUTLASS_INSTALL_TESTS="no"
+               -DCUTLASS_NVCC_ARCHS="${CUDAARCHS}"
+               -DCUTLASS_UNITY_BUILD_ENABLED="$(usex jumbo-build)"
+               -DCUTLASS_USE_SYSTEM_GOOGLETEST="yes"
+               -DIMPLICIT_CMAKE_CXX_STANDARD="yes"
+       )
+
+       if use doc; then
+               mycmakeargs+=(
+                       # Use dot to generate graphs in the doxygen 
documentation.
+                       -DCUTLASS_ENABLE_DOXYGEN_DOT="$(usex dot)"
+               )
+       fi
+
+       if use test; then
+               mycmakeargs+=(
+                       -DCUTLASS_TEST_LEVEL="2"
+               )
+       fi
+
+       cmake_src_configure
+}
+
+src_test() {
+       cuda_add_sandbox -w
+       cmake_src_test
+}
+
+src_install() {
+       cmake_src_install
+       rm -r "${ED}"/usr/test || die
+}

Reply via email to