commit: c4c3a9517f1f59dd2ac5bf656a37bff862457c7b
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Mon Jul 28 05:50:54 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon Jul 28 05:50:54 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=c4c3a951
dev-libs/cutlass: add 3.9.2
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
dev-libs/cutlass/Manifest | 1 +
dev-libs/cutlass/cutlass-3.9.2.ebuild | 146 ++++++++++++++++++++++++++++++++++
2 files changed, 147 insertions(+)
diff --git a/dev-libs/cutlass/Manifest b/dev-libs/cutlass/Manifest
index 7d37a1b3f103..25d66b3cd8e8 100644
--- a/dev-libs/cutlass/Manifest
+++ b/dev-libs/cutlass/Manifest
@@ -2,3 +2,4 @@ DIST cutlass-3.4.1.tar.gz 23351905 BLAKE2B
6524d70a4f65fc28c7150aa44bfbb5cc64c1b
DIST cutlass-3.5.1.tar.gz 29257911 BLAKE2B
680a67beabf821873655ea8145a84a0a6084b5d9dbd03bdc1b49a7964bcd4e7e5e3b213fea46d09583715e8862795455cbc47e5dac4e5bba546b7aeaf31881ce
SHA512
f9bc493a80e959b21d3adbe85987d375c052f6095be9e13b871f890a6ead093bfb68712eae206fd8fc3f0a2ac06d96760ffec7939869b0e12c4c37788184cc21
DIST cutlass-3.8.0.tar.gz 31021072 BLAKE2B
4dd85f7c0d3452c2a194902fcd0afd7de3a3f17f86f477628d5e5f416ac885a86ed1fbbf2a9959a46e60e38a93400a7ec99bad1f980b0a4be36fad0de887ec0b
SHA512
a08aac281fb3bdea82c0a044dc643c40e4803d02e55bbea450021cb7a5472aed86e79c5df41cd981976af8403f18cc48d8069045c4e68339430d3a3caeb109ac
DIST cutlass-3.9.1.tar.gz 31532887 BLAKE2B
bdfddc707ee5c1303bebcefa89cc956271f140d6c2b1e928d2e4510def272cedcbc37b884a62ca371f967638f335d5e9fc63adecc862bf75861128cdffd57053
SHA512
1507b9b0a7caf055c7d6e3d4c6a4012ff46daf7a9ccc0ac6dda6ba85ae226f692489854ad74c909b946c9a111c280d0d05efc548dbd5d9497b928caaf46f3611
+DIST cutlass-3.9.2.tar.gz 31534258 BLAKE2B
04462b3c6983f96b2027821408c4de30bf6b2e18e986ddebaf4f9d5572df354273116603ccc0ac618c61e03b981972e6d7786f354aa4f5e08d185cf7e4ad8e1d
SHA512
d45a9e4908b5886259acc1ffd4c8e4c6072801ad45909f365d599510b9989d3313438f2fa5cbee5c1e916e496a0b95bda85f79de3c38502d73e2b9206f868822
diff --git a/dev-libs/cutlass/cutlass-3.9.2.ebuild
b/dev-libs/cutlass/cutlass-3.9.2.ebuild
new file mode 100644
index 000000000000..25b85cb0784b
--- /dev/null
+++ b/dev-libs/cutlass/cutlass-3.9.2.ebuild
@@ -0,0 +1,146 @@
+# Copyright 2023-2025 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+PYTHON_COMPAT=( python3_{11..13} )
+
+inherit cuda cmake python-any-r1 flag-o-matic toolchain-funcs
+
+DESCRIPTION="CUDA Templates for Linear Algebra Subroutines"
+HOMEPAGE="https://github.com/NVIDIA/cutlass"
+SRC_URI="https://github.com/NVIDIA/${PN}/archive/refs/tags/v${PV}.tar.gz
+ -> ${P}.tar.gz"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="~amd64"
+
+X86_CPU_FEATURES=(
+ f16c:f16c
+)
+CPU_FEATURES=( "${X86_CPU_FEATURES[@]/#/cpu_flags_x86_}" )
+
+IUSE="clang-cuda cublas cudnn doc dot examples +headers-only jumbo-build
performance profiler test tools ${CPU_FEATURES[*]%:*}"
+
+REQUIRED_USE="
+ headers-only? (
+ !examples !test
+ )
+ test? ( tools )
+"
+
+RESTRICT="!test? ( test )"
+
+RDEPEND="
+ dev-util/nvidia-cuda-toolkit:=
+"
+DEPEND="${RDEPEND}
+ test? (
+ ${PYTHON_DEPS}
+ cudnn? (
+ dev-libs/cudnn:=
+ )
+ )
+ tools? (
+ ${PYTHON_DEPS}
+ )
+"
+
+pkg_setup() {
+ if use test || use tools; then
+ python-any-r1_pkg_setup
+ fi
+}
+
+src_prepare() {
+ cmake_src_prepare
+
+ sed \
+ -e '/-std=/s/17/20/g' \
+ -i \
+ CMakeLists.txt \
+ python/cutlass/backend/compiler.py \
+ python/cutlass/emit/pytorch.py \
+ python/docs/_modules/cutlass/emit/pytorch.html \
+ test/unit/nvrtc/thread/nvrtc_contraction.cu \
+ test/unit/nvrtc/thread/testbed.h \
+ media/docs/cpp/ide_setup.md \
+ || die
+
+}
+
+src_configure() {
+ # we can use clang as default
+ if use clang-cuda && ! tc-is-clang ; then
+ export CC="${CHOST}-clang"
+ export CXX="${CHOST}-clang++"
+ else
+ tc-export CXX CC
+ fi
+
+ # clang-cuda needs to filter mfpmath
+ if use clang-cuda ; then
+ filter-mfpmath sse
+ filter-mfpmath i386
+ fi
+ if use clang-cuda ; then
+ export CUDACXX=clang++
+ fi
+
+ cuda_add_sandbox
+ addpredict "/dev/char/"
+
+ local mycmakeargs=(
+ -DCMAKE_POLICY_DEFAULT_CMP0156="OLD" # cutlass_add_library
+
+ # -DCMAKE_CUDA_COMPILER="$(cuda_get_host_compiler)" #
nvcc/clang++
+ -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f)"
+
+ -DCMAKE_DISABLE_FIND_PACKAGE_Doxygen="$(usex !doc)"
+
+ -DCUTLASS_REVISION="${PVR}"
+ -DCUTLASS_ENABLE_CUBLAS="$(usex cublas)"
+ -DCUTLASS_ENABLE_CUDNN="$(usex cudnn)"
+ -DCUTLASS_ENABLE_EXAMPLES="$(usex examples)"
+ -DCUTLASS_ENABLE_F16C="$(usex cpu_flags_x86_f16c)"
+ -DCUTLASS_ENABLE_GTEST_UNIT_TESTS="$(usex test)"
+ -DCUTLASS_ENABLE_HEADERS_ONLY="$(usex headers-only)"
+ -DCUTLASS_ENABLE_LIBRARY="$(usex !headers-only)"
+ -DCUTLASS_ENABLE_PERFORMANCE="$(usex performance)"
+ -DCUTLASS_ENABLE_PROFILER="$(usex profiler)"
+ -DCUTLASS_ENABLE_PROFILER_UNIT_TESTS="$(usex test "$(usex
profiler)")"
+ -DCUTLASS_ENABLE_TESTS="$(usex test)"
+ -DCUTLASS_ENABLE_TOOLS="$(usex tools)"
+ -DCUTLASS_INSTALL_TESTS="no"
+ -DCUTLASS_NVCC_ARCHS="${CUDAARCHS}"
+ -DCUTLASS_UNITY_BUILD_ENABLED="$(usex jumbo-build)"
+ -DCUTLASS_USE_SYSTEM_GOOGLETEST="yes"
+ -DIMPLICIT_CMAKE_CXX_STANDARD="yes"
+ )
+
+ if use doc; then
+ mycmakeargs+=(
+ # Use dot to generate graphs in the doxygen
documentation.
+ -DCUTLASS_ENABLE_DOXYGEN_DOT="$(usex dot)"
+ )
+ fi
+
+ if use test; then
+ mycmakeargs+=(
+ -DCUTLASS_TEST_LEVEL="2"
+ )
+ fi
+
+ cmake_src_configure
+}
+
+src_test() {
+ cuda_add_sandbox -w
+ cmake_src_test
+}
+
+src_install() {
+ cmake_src_install
+ rm -r "${ED}"/usr/test || die
+}