commit: 3db5a7425da176948a8378b1d9d671f1ed4544c0
Author: Michael Mair-Keimberger <mm1ke <AT> gentoo <DOT> org>
AuthorDate: Tue Nov 25 20:47:43 2025 +0000
Commit: Michael Mair-Keimberger <mm1ke <AT> gentoo <DOT> org>
CommitDate: Tue Nov 25 20:47:43 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=3db5a742
sci-ml/tokenizers: remove unused patch
Signed-off-by: Michael Mair-Keimberger <mm1ke <AT> gentoo.org>
.../tokenizers/files/tokenizers-0.15.2-test.patch | 39 ----------------------
1 file changed, 39 deletions(-)
diff --git a/sci-ml/tokenizers/files/tokenizers-0.15.2-test.patch
b/sci-ml/tokenizers/files/tokenizers-0.15.2-test.patch
deleted file mode 100644
index 01a872cb846a..000000000000
--- a/sci-ml/tokenizers/files/tokenizers-0.15.2-test.patch
+++ /dev/null
@@ -1,39 +0,0 @@
---- a/tests/bindings/test_trainers.py 2024-04-07 18:21:19.443506351 +0200
-+++ b/tests/bindings/test_trainers.py 2024-04-07 18:21:54.893466083 +0200
-@@ -295,8 +295,8 @@
- tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
- [pre_tokenizers.Whitespace(),
pre_tokenizers.Digits(individual_digits=True)]
- )
-- tokenizer.train(files=["data/big.txt"], trainer=trainer)
-+ tokenizer.train(files=["tests/data/big.txt"], trainer=trainer)
-
-- tokenizer.save("data/tokenizer.json")
-+ tokenizer.save("tests/data/tokenizer.json")
-
-- tokenizer.from_file("data/tokenizer.json")
-+ tokenizer.from_file("tests/data/tokenizer.json")
---- a/tests/documentation/test_tutorial_train_from_iterators.py
2024-04-07 18:19:08.653593406 +0200
-+++ b/tests/documentation/test_tutorial_train_from_iterators.py
2024-04-07 18:19:39.206906910 +0200
-@@ -40,7 +40,7 @@
- def setup_gzip_files(self, train_files):
- with open(train_files["small"], "rt") as small:
- for n in range(3):
-- path = f"data/my-file.{n}.gz"
-+ path = f"tests/data/my-file.{n}.gz"
- with gzip.open(path, "wt") as f:
- f.write(small.read())
-
-@@ -87,11 +87,11 @@
- # START single_gzip
- import gzip
-
-- with gzip.open("data/my-file.0.gz", "rt") as f:
-+ with gzip.open("tests/data/my-file.0.gz", "rt") as f:
- tokenizer.train_from_iterator(f, trainer=trainer)
- # END single_gzip
- # START multi_gzip
-- files = ["data/my-file.0.gz", "data/my-file.1.gz",
"data/my-file.2.gz"]
-+ files = ["tests/data/my-file.0.gz", "tests/data/my-file.1.gz",
"tests/data/my-file.2.gz"]
-
- def gzip_iterator():
- for path in files: