commit: cc17fc9c3c6000cdaf75002c5485ea5eb2bd05d7
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Fri Nov 21 20:18:11 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Fri Nov 21 20:18:11 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=cc17fc9c
sci-ml/tokenizers: drop 0.21.4
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
sci-ml/tokenizers/Manifest | 3 -
sci-ml/tokenizers/tokenizers-0.21.4.ebuild | 142 -----------------------------
2 files changed, 145 deletions(-)
diff --git a/sci-ml/tokenizers/Manifest b/sci-ml/tokenizers/Manifest
index 886af3585ef0..94c91136ea51 100644
--- a/sci-ml/tokenizers/Manifest
+++ b/sci-ml/tokenizers/Manifest
@@ -1,6 +1,3 @@
-DIST tokenizers-0.21.4-crates.tar.xz 20743760 BLAKE2B
ddf6cd0c900c74a0c10232111e600f8a11533ea699c525c2e6a3fa47629c83645893904fb9fd74cb2e2d9ce7211a38807a2346fdd26a251069dcf101dc7c8870
SHA512
99be2a64790df9f17329ccb65243642dd5c9c1ff950be7d6211ef502eb368b05392984bbaca267801b0755aacef552c7b24cfbadb84b1b907fd543702e242d87
-DIST tokenizers-0.21.4.gh.tar.gz 1552701 BLAKE2B
6587f635b5194493f45f9975fe154e1c8bd79c3e0d35db2b868567fe772a5a8ed0a5db4df6be013099e5869d1ca2a19fc5f5e80a0882ecdc859a18506596b683
SHA512
7ed897db3b3edfec740c97311cdab072a90d9a8f637a7fbb23d1595b38e9215c840861531ed0e5284d62a669e1760876ee7f26e0db466a8d404464afd1f37e1a
DIST tokenizers-0.22.0-crates.tar.xz 20740996 BLAKE2B
fcd0fcd907ae8e9c9aa86eb31ab8aaef5767e9279692dc2c2218673a4b7be73795ca41f91c662e10bfd31eb91af1199e01f8fde1ceb69c189a9bb0c279196b47
SHA512
30bd9df1975ff6388708057e6266f981a8dc65664aab6dd81fb9f884f3631fd56c2ea8f1ac2872f2c566f8a656cf2f36ef1ee4fcebb2cbd74c6b0a0e1b14c373
DIST tokenizers-0.22.1.gh.tar.gz 1563505 BLAKE2B
798193afeef89a93fdd007d1ffa42113df0ed2d9de92c290a4789ab205cc6c34cf68ba62cb44e50606ec54473f05fbd405b295f9193c1c1f8af845c9ce3104c2
SHA512
1a25ee6b218232112f10bc1e082e71ed1960ab7fa62c7341b38cc5f5dc0a735cf35b3e90d4a1c1cc0aedb7c198182c2e559662459a668a2fef4e633096a1cccc
-DIST tokenizers-python-0.21.4-crates.tar.xz 10997708 BLAKE2B
1e8aa5a1bc5c2592d310a72b0bed8e301ba7e294643425fe872eb300b97280ce6e5a8d895afe0175c1443096e314053c23b43388d16f1c01f51d9926b0be1ef8
SHA512
bc8a978ebb01c784b538013a8656863aae6a1e25b40c7fb9fde878e4351abaf5a8934a903fb60ac4ce802f8f35ebe393f0e2888759b71689f2f2df4f1c88a02d
DIST tokenizers-python-0.22.1-crates.tar.xz 14184544 BLAKE2B
718c7246fb77568c1aa54e168fce682ef3130e2d51f8b6b0ed5dea96f4dabedfc92e395ef083b514732b8d04fed1d5a5ad1ba4a9667eee0e993616bd5f03184d
SHA512
1226e024ab7ad7dc06e191951e2ffa8d30fd575d867d949ad37ffd4fe6a7e491b66a2a767e5d0dbb4b55f8b8eff809baceb11950b811e4e5d685074507060e23
diff --git a/sci-ml/tokenizers/tokenizers-0.21.4.ebuild
b/sci-ml/tokenizers/tokenizers-0.21.4.ebuild
deleted file mode 100644
index be3136b60950..000000000000
--- a/sci-ml/tokenizers/tokenizers-0.21.4.ebuild
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright 2023-2025 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-# Autogenerated by pycargoebuild 0.15.0
-
-EAPI=8
-
-DISTUTILS_USE_PEP517=maturin
-PYTHON_COMPAT=( python3_{10..13} )
-DISTUTILS_EXT=1
-DISTUTILS_SINGLE_IMPL=1
-RUST_MIN_VER="1.82.0"
-
-CRATES="
-"
-
-inherit cargo distutils-r1
-
-DESCRIPTION="Implementation of today's most used tokenizers"
-HOMEPAGE="https://github.com/huggingface/tokenizers"
-SRC_URI="
- https://github.com/huggingface/${PN}/archive/refs/tags/v${PV}.tar.gz
- -> ${P}.gh.tar.gz
- ${CARGO_CRATE_URIS}
-"
-if [[ ${PKGBUMPING} != ${PVR} ]]; then
- SRC_URI+="
- https://dev.gentoo.org/~tupone/distfiles/${P}-crates.tar.xz
-
https://dev.gentoo.org/~tupone/distfiles/${PN}-python-${PV}-crates.tar.xz
- "
-fi
-
-LICENSE="Apache-2.0"
-# Dependent crate licenses
-LICENSE+="
- Apache-2.0 Apache-2.0-with-LLVM-exceptions BSD-2 BSD ISC MIT MPL-2.0
- Unicode-DFS-2016
-"
-SLOT="0"
-KEYWORDS="~amd64"
-
-RDEPEND="dev-libs/oniguruma"
-BDEPEND="
- test? ( sci-ml/datasets[${PYTHON_SINGLE_USEDEP}] )
- $(python_gen_cond_dep '
- dev-python/setuptools-rust[${PYTHON_USEDEP}]
- ')
-"
-
-distutils_enable_tests pytest
-
-QA_FLAGS_IGNORED=".*/site-packages/tokenizers/.*so"
-
-src_unpack() {
- cargo_src_unpack
-}
-
-pkg_setup() {
- python-single-r1_pkg_setup
- rust_pkg_setup
-}
-
-src_prepare() {
- default
- cd bindings/python
- eapply "${FILESDIR}"/${PN}-0.21.2-test.patch
- distutils-r1_src_prepare
-}
-
-src_configure() {
- cd tokenizers
- cargo_src_configure
- cd ../bindings/python
- distutils-r1_src_configure
-}
-
-src_compile() {
- export RUSTONIG_SYSTEM_LIBONIG=1
- cd tokenizers
- cargo_src_compile
- cd ../bindings/python
- distutils-r1_src_compile
-}
-
-src_test() {
- cd tokenizers
- # Tests do not work
- #cargo_src_test
- cd ../bindings/python
- local -x EPYTEST_IGNORE=( benches/ )
- local -x EPYTEST_DESELECT=(
- tests/bindings/test_encoding.py::TestEncoding::test_sequence_ids
- tests/bindings/test_encoding.py::TestEncoding::test_n_sequences
-
tests/bindings/test_encoding.py::TestEncoding::test_word_to_tokens
-
tests/bindings/test_encoding.py::TestEncoding::test_word_to_chars
-
tests/bindings/test_encoding.py::TestEncoding::test_token_to_sequence
-
tests/bindings/test_encoding.py::TestEncoding::test_token_to_chars
-
tests/bindings/test_encoding.py::TestEncoding::test_token_to_word
-
tests/bindings/test_encoding.py::TestEncoding::test_char_to_token
- tests/bindings/test_encoding.py::TestEncoding::test_char_to_word
- tests/bindings/test_encoding.py::TestEncoding::test_truncation
-
tests/bindings/test_encoding.py::TestEncoding::test_invalid_truncate_direction
- tests/bindings/test_models.py::TestBPE::test_instantiate
- tests/bindings/test_models.py::TestWordLevel::test_instantiate
- tests/bindings/test_models.py::TestWordPiece::test_instantiate
-
tests/bindings/test_processors.py::TestByteLevelProcessing::test_processing
-
tests/bindings/test_trainers.py::TestUnigram::test_continuing_prefix_trainer_mismatch
- tests/bindings/test_trainers.py::TestUnigram::test_train
-
tests/bindings/test_trainers.py::TestUnigram::test_train_parallelism_with_custom_pretokenizer
-
tests/documentation/test_pipeline.py::TestPipeline::test_pipeline
-
tests/documentation/test_pipeline.py::TestPipeline::test_bert_example
-
tests/implementations/test_char_bpe.py::TestCharBPETokenizer::test_basic_encode
-
tests/implementations/test_char_bpe.py::TestCharBPETokenizer::test_lowercase
-
tests/implementations/test_char_bpe.py::TestCharBPETokenizer::test_decoding
-
tests/implementations/test_char_bpe.py::TestCharBPETokenizer::test_multiprocessing_with_parallelism
-
tests/test_serialization.py::TestSerialization::test_full_serialization_albert
- tests/test_serialization.py::TestSerialization::test_str_big
-
tests/bindings/test_tokenizer.py::TestTokenizer::test_encode_formats
-
tests/bindings/test_tokenizer.py::TestTokenizer::test_encode_add_special_tokens
-
tests/bindings/test_tokenizer.py::TestTokenizer::test_from_pretrained
-
tests/bindings/test_tokenizer.py::TestTokenizer::test_from_pretrained_revision
-
tests/bindings/test_tokenizer.py::TestTokenizer::test_encode_special_tokens
- tests/bindings/test_tokenizer.py::TestTokenizer::test_splitting
-
tests/documentation/test_quicktour.py::TestQuicktour::test_quicktour
-
tests/documentation/test_tutorial_train_from_iterators.py::TestTrainFromIterators::test_datasets
-
tests/documentation/test_tutorial_train_from_iterators.py::TestTrainFromIterators::test_gzip
-
tests/implementations/test_bert_wordpiece.py::TestBertWordPieceTokenizer::test_basic_encode
-
tests/implementations/test_bert_wordpiece.py::TestBertWordPieceTokenizer::test_multiprocessing_with_parallelism
-
tests/implementations/test_byte_level_bpe.py::TestByteLevelBPE::test_basic_encode
-
tests/implementations/test_byte_level_bpe.py::TestByteLevelBPE::test_add_prefix_space
-
tests/implementations/test_byte_level_bpe.py::TestByteLevelBPE::test_lowerspace
-
tests/implementations/test_byte_level_bpe.py::TestByteLevelBPE::test_multiprocessing_with_parallelism
-
- )
- distutils-r1_src_test
-}
-
-src_install() {
- cd tokenizers
- cd ../bindings/python
- distutils-r1_src_install
-}