commit:     468f1780ee80239c9969cedb5533563d57e433d3
Author:     Sergey Alirzaev <l29ah <AT> riseup <DOT> net>
AuthorDate: Fri Jan  2 19:06:31 2026 +0000
Commit:     Sergey Alirzaev <zl29ah <AT> gmail <DOT> com>
CommitDate: Fri Jan  2 19:06:31 2026 +0000
URL:        https://gitweb.gentoo.org/repo/proj/guru.git/commit/?id=468f1780

sci-misc/llama-cpp: add 0_pre7611, drop 0_pre5633-r1

now prompt processing is very fast, at least with Vulkan on GLM 4.5 Air

Signed-off-by: Sergey Alirzaev <l29ah <AT> riseup.net>

 sci-misc/llama-cpp/Manifest                        |  2 +-
 ...re5633-r1.ebuild => llama-cpp-0_pre7611.ebuild} | 79 ++++++++++++++--------
 2 files changed, 53 insertions(+), 28 deletions(-)

diff --git a/sci-misc/llama-cpp/Manifest b/sci-misc/llama-cpp/Manifest
index 0f81ecfa2b..2e4152f192 100644
--- a/sci-misc/llama-cpp/Manifest
+++ b/sci-misc/llama-cpp/Manifest
@@ -1,6 +1,6 @@
-DIST llama-cpp-0_pre5633.tar.gz 24986657 BLAKE2B 
6215dbfea54cb23a57419cc5a530be5622ec834c6d005337bcf92c50e152979375592088e215845e8f07c6b3f7eec15132cd15ebf9b0725adabe499951ae4735
 SHA512 
11a1917eb86c7065ea901cb62bdc7a25d8d7b962358570c2c7ae0c2d7abce6d19ebc6af74512593ebafbb4ee23546128cf8bfee5ba769c4f3cd2e254cdc1a1a4
 DIST llama-cpp-0_pre6140.tar.gz 25487270 BLAKE2B 
48b809c860437a8eb8b886e417299c2135dcd3c74aa2782b0380b0785b4e8c383c6061bd7ac43e9eddb1e31769ccceb48811a5433d45ac41adb00ece0e6af93d
 SHA512 
d8e904ddd6b935f84230ee163414922f572e71e2dd05d639a5b1a4a36c6841ac476f81be8e39f087712766e88564d8a499cdff4d717a5f910caf9f799a2b998d
 DIST llama-cpp-0_pre6318.tar.gz 25626090 BLAKE2B 
b95826a5fd4ab27927d390cdc091648d1ffe281d5d9946fdfa4e6c8c59fb7461dd1e2b83751c86c575b4f00207bbd0cfbe467a0ae9dfdb3b192356bc77e0f808
 SHA512 
f3b5655123919a76fa27f1be05ffb2a7f681d7793d4d9e24106739a21846a2918ffdf9ef326ac99a55f6b4943059e4f76de754da894ff6fdd7e2d56a41edc56b
 DIST llama-cpp-0_pre6710.tar.gz 25894417 BLAKE2B 
147f30d76fd49bf18fa0ab9e3e75d0ad337dcd87a73f1dbce43f180488ea06b40b1a2a93b4686a88b5a442dd4dd6a8e45bf848ceb549bdc0ad0078427336c56e
 SHA512 
75c5918713256cb11f704b94d6e249a9f3ac2dde1107a6f4506134ba9c772e1c42d991915b571887207003f4b0679a183cd0787ffd742a08d2283fdfb86695eb
 DIST llama-cpp-0_pre6980.tar.gz 26431911 BLAKE2B 
b7d7c0dcdabde01acb816e73bc344564823dd1fc498fb98bf3c611b2d7a964af4d94f7cad533fe675a30685d510829160e392ab0f3bd16f4757a2f3446b8e3ca
 SHA512 
33e63336ad7c0fc653acd409d9314ce3fc3755ed1c03b4806c647b7c80d91b3c883aec6633334555c3855a24276d4975a54c96af91df8d2f818d4dd1dbcbabfb
 DIST llama-cpp-0_pre7276.tar.gz 27765814 BLAKE2B 
d0553ab1dd29c9d93a18c6217aab4553faf09e385a94b90732a537bbcf9bded54d5cda28553543e2c0cc71b6a157bfb80a48405f3f8281c51525757967b33e16
 SHA512 
3035fe53fea2ca3b0f35e479f4eaec75e38a2ea670600445776cd6fa696fc83ca19eb6dd7cd2ab1da69e78293c62318b5182e6e5b3423ae6c1f00854c5132a4c
+DIST llama-cpp-0_pre7611.tar.gz 28622786 BLAKE2B 
3c345645c9bcf07d8a513b9e883619b31b5254581f73429d638403758429fd2dfc5f78a22d538e8d88eb6c1be74bf805481af697480727ed750492ddec5c37fe
 SHA512 
c6c4780d7e68adfc385b57c6f7530423f8205bfa283572b0d414d55e143c03307e98676e41ad527c37d7837f831f8ff24be0f7bf59e366ea82f3802cdc946821

diff --git a/sci-misc/llama-cpp/llama-cpp-0_pre5633-r1.ebuild 
b/sci-misc/llama-cpp/llama-cpp-0_pre7611.ebuild
similarity index 53%
rename from sci-misc/llama-cpp/llama-cpp-0_pre5633-r1.ebuild
rename to sci-misc/llama-cpp/llama-cpp-0_pre7611.ebuild
index e85f09260c..a81045e6d7 100644
--- a/sci-misc/llama-cpp/llama-cpp-0_pre5633-r1.ebuild
+++ b/sci-misc/llama-cpp/llama-cpp-0_pre7611.ebuild
@@ -5,7 +5,7 @@ EAPI=8
 
 ROCM_VERSION="6.3"
 
-inherit cmake cuda rocm
+inherit cmake cuda rocm linux-info
 
 if [[ "${PV}" != "9999" ]]; then
        KEYWORDS="~amd64"
@@ -23,42 +23,44 @@ HOMEPAGE="https://github.com/ggml-org/llama.cpp";
 LICENSE="MIT"
 SLOT="0"
 CPU_FLAGS_X86=( avx avx2 f16c )
-IUSE="curl openblas blis hip cuda"
-REQUIRED_USE="?? ( openblas blis )"
-
-AMDGPU_TARGETS_COMPAT=(
-       gfx900
-       gfx90c
-       gfx902
-       gfx1010
-       gfx1011
-       gfx1012
-       gfx1030
-       gfx1031
-       gfx1032
-       gfx1034
-       gfx1035
-       gfx1036
-       gfx1100
-       gfx1101
-       gfx1102
-       gfx1103
-       gfx1150
-       gfx1151
-)
+IUSE="curl openblas +openmp blis hip cuda opencl vulkan flexiblas"
+REQUIRED_USE="?? ( openblas blis flexiblas )"
 
 # curl is needed for pulling models from huggingface
 # numpy is used by convert_hf_to_gguf.py
-DEPEND="
+CDEPEND="
        curl? ( net-misc/curl:= )
        openblas? ( sci-libs/openblas:= )
+       openmp? ( llvm-runtimes/openmp:= )
        blis? ( sci-libs/blis:= )
-       hip? (  >=dev-util/hip-6.3:= )
+       flexiblas? ( sci-libs/flexiblas:= )
+       hip? ( >=dev-util/hip-6.3:=
+               >=sci-libs/hipBLAS-6.3:=
+       )
        cuda? ( dev-util/nvidia-cuda-toolkit:= )
 "
-RDEPEND="${DEPEND}
+DEPEND="${CDEPEND}
+       opencl? ( dev-util/opencl-headers )
+       vulkan? ( dev-util/vulkan-headers )
+"
+RDEPEND="${CDEPEND}
        dev-python/numpy
+       opencl? ( dev-libs/opencl-icd-loader )
+       vulkan? ( media-libs/vulkan-loader )
 "
+BDEPEND="media-libs/shaderc"
+
+pkg_setup() {
+       if use hip; then
+               linux-info_pkg_setup
+               if linux-info_get_any_version && linux_config_exists; then
+                       if ! linux_chkconfig_present HSA_AMD_SVM; then
+                               ewarn "To use ROCm/HIP, you need to have 
HSA_AMD_SVM option enabled in your kernel."
+                       fi
+               fi
+
+       fi
+}
 
 src_prepare() {
        use cuda && cuda_src_prepare
@@ -77,6 +79,13 @@ src_configure() {
                -DBUILD_NUMBER="1"
                -DGENTOO_REMOVE_CMAKE_BLAS_HACK=ON
                -DGGML_CUDA=$(usex cuda ON OFF)
+               -DGGML_OPENCL=$(usex opencl ON OFF)
+               -DGGML_OPENMP=$(usex openmp ON OFF)
+               -DGGML_VULKAN=$(usex vulkan ON OFF)
+
+               # avoid clashing with whisper.cpp
+               -DCMAKE_INSTALL_LIBDIR="${EPREFIX}/usr/$(get_libdir)/llama.cpp"
+               -DCMAKE_INSTALL_RPATH="${EPREFIX}/usr/$(get_libdir)/llama.cpp"
        )
 
        if use openblas ; then
@@ -91,6 +100,19 @@ src_configure() {
                )
        fi
 
+       if use flexiblas; then
+               mycmakeargs+=(
+                       -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=FlexiBLAS
+               )
+       fi
+
+       if use cuda; then
+               local -x CUDAHOSTCXX="$(cuda_gccdir)"
+               # tries to recreate dev symlinks
+               cuda_add_sandbox
+               addpredict "/dev/char/"
+       fi
+
        if use hip; then
                rocm_use_hipcc
                mycmakeargs+=(
@@ -104,4 +126,7 @@ src_configure() {
 src_install() {
        cmake_src_install
        dobin "${BUILD_DIR}/bin/rpc-server"
+
+       # avoid clashing with whisper.cpp
+       rm -rf "${ED}/usr/include"
 }

Reply via email to