commit: a6d51389bf37280164889b1379d29cd5186c6c85
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Thu Apr 17 12:13:01 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Thu Apr 17 12:13:24 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=a6d51389
sci-ml/datasets: add 3.5.0
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
sci-ml/datasets/Manifest | 1 +
sci-ml/datasets/datasets-3.5.0.ebuild | 126 ++++++++++++++++++++++++++++++++++
sci-ml/datasets/metadata.xml | 4 ++
3 files changed, 131 insertions(+)
diff --git a/sci-ml/datasets/Manifest b/sci-ml/datasets/Manifest
index bc908dca6594..450673b4be4f 100644
--- a/sci-ml/datasets/Manifest
+++ b/sci-ml/datasets/Manifest
@@ -3,3 +3,4 @@ DIST datasets-3.1.0.gh.tar.gz 1877057 BLAKE2B
818cbf10282f775cda5956ae37607bb535
DIST datasets-3.2.0.gh.tar.gz 1877261 BLAKE2B
0169ee27e1128f8b64fda0e3a4dd63d5d603874cdbf703cd73f100ad1ea69294c300b93cddaa4f2dd8c795570b76adb34e40475d38730f4733679008fb593d84
SHA512
2e381d6919ea7f2e934e832d6bff30a4b23cfe929c0b4345703ce39c7882daf8a234fe71ca89d1d9bcd75000e35845156f7036f9604504ac584feb1b8d3239b1
DIST datasets-3.3.2.gh.tar.gz 1886396 BLAKE2B
bb51d8e929fbadd9d04f7ca6c8894dc06b88940643f02743b10cea3c2dd5b91b720358d2229a2263e236a6b4911d9dc60e8698da4369393a63f2603274a4f599
SHA512
4c6c4a4e748b5018574c0968929d1dac2aee96bc6f964dedf54c63e872808ca4a4958ed59e0240c958f352ca7a8d333de7ac44095e7c9525e2d9e703afbaeabf
DIST datasets-3.4.1.gh.tar.gz 1888651 BLAKE2B
836f0e0bb55cefd5a0616785c87c867292cc46ec609934b4f7688462aa7ec95b248f06018cd99e68a88b4e179e0aa48dd601a2fc74bc1b809c336bd70ae0bd6a
SHA512
c3654fec241b5da1fb2c9349ff2d0f309e184e5158a0bd19ca0477f1bfb7570d9268e93d1828e1648132563c04ba62ec4eabead2f29d8ef1a29b593aec1c62b1
+DIST datasets-3.5.0.gh.tar.gz 1906212 BLAKE2B
2c26b907230eb9256a6056878dc80eae3296869f24597298f262d49a5fc0a31dbcd6ebb8dfaca9cf27faece019a4f39ab0dd07db772659d414e1d3f5ecfe2258
SHA512
ca053187b6b80140136991e5b220136cf22f3cd98d073323e0a72d77211248d52986aa49b0e595759ba023b4551a9dc01ab1cac853c855f032acbd9a023ecd5a
diff --git a/sci-ml/datasets/datasets-3.5.0.ebuild
b/sci-ml/datasets/datasets-3.5.0.ebuild
new file mode 100644
index 000000000000..98f3234c789b
--- /dev/null
+++ b/sci-ml/datasets/datasets-3.5.0.ebuild
@@ -0,0 +1,126 @@
+# Copyright 2023-2025 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{11..13} )
+DISTUTILS_SINGLE_IMPL=1
+inherit distutils-r1
+
+DESCRIPTION="Access and share datasets for Audio, Computer Vision, and NLP
tasks"
+HOMEPAGE="https://pypi.org/project/datasets/"
+SRC_URI="https://github.com/huggingface/${PN}/archive/refs/tags/${PV}.tar.gz
+ -> ${P}.gh.tar.gz"
+
+LICENSE="Apache-2.0"
+SLOT="0"
+KEYWORDS="~amd64"
+IUSE="torch vision"
+REQUIRES_USE="test? ( torch vision )"
+
+RDEPEND="
+ sci-ml/huggingface_hub[${PYTHON_SINGLE_USEDEP}]
+ $(python_gen_cond_dep '
+ dev-python/aiohttp[${PYTHON_USEDEP}]
+ dev-python/dill[${PYTHON_USEDEP}]
+ dev-python/filelock[${PYTHON_USEDEP}]
+ dev-python/fsspec[${PYTHON_USEDEP}]
+ dev-python/multiprocess[${PYTHON_USEDEP}]
+ dev-python/numpy[${PYTHON_USEDEP}]
+ dev-python/packaging[${PYTHON_USEDEP}]
+ dev-python/pandas[${PYTHON_USEDEP}]
+ dev-python/pyarrow[${PYTHON_USEDEP},parquet,snappy]
+ dev-python/pyyaml[${PYTHON_USEDEP}]
+ dev-python/requests[${PYTHON_USEDEP}]
+ dev-python/tqdm[${PYTHON_USEDEP}]
+ dev-python/xxhash[${PYTHON_USEDEP}]
+ vision? (
+ dev-python/pillow[${PYTHON_USEDEP}]
+ )
+ ')
+ torch? (
+ sci-ml/caffe2[${PYTHON_SINGLE_USEDEP},numpy]
+ sci-ml/pytorch[${PYTHON_SINGLE_USEDEP}]
+ )
+"
+DEPEND="${RDEPEND}"
+# Missing x test:
+# joblib
+# joblibspark
+# faiss-cpu
+# jax
+# jaxlib
+# polars
+# pyav
+# pyspark
+# py7zr
+# s3fs
+# tensorflow
+# tiktoken
+# torchdata
+# transformers
+BDEPEND="test? (
+ sci-ml/torchvision[${PYTHON_SINGLE_USEDEP}]
+ $(python_gen_cond_dep '
+ dev-python/absl-py[${PYTHON_USEDEP}]
+ dev-python/decorator[${PYTHON_USEDEP}]
+ dev-python/elasticsearch[${PYTHON_USEDEP}]
+ dev-python/lz4[${PYTHON_USEDEP}]
+ dev-python/moto[${PYTHON_USEDEP}]
+ dev-python/protobuf:=[${PYTHON_USEDEP}]
+ dev-python/pytest-datadir[${PYTHON_USEDEP}]
+ dev-python/pytest-xdist[${PYTHON_USEDEP}]
+ dev-python/soundfile[${PYTHON_USEDEP}]
+ dev-python/sqlalchemy[${PYTHON_USEDEP}]
+ dev-python/zstandard[${PYTHON_USEDEP}]
+ ')
+)"
+
+distutils_enable_tests pytest
+
+src_test() {
+ local EPYTEST_IGNORE=(
+ tests/features/test_audio.py
+ tests/packaged_modules/test_audiofolder.py
+ tests/packaged_modules/test_spark.py
+ tests/test_fingerprint.py
+ tests/test_iterable_dataset.py
+ tests/test_inspect.py
+ tests/test_load.py
+ tests/test_upstream_hub.py
+ )
+
+ local EPYTEST_DESELECT=(
+ tests/commands/test_test.py::test_test_command
+ tests/features/test_video.py::test_video_feature_encode_example
+ tests/features/test_video.py::test_dataset_with_video_feature
+
tests/features/test_video.py::test_dataset_with_video_map_and_formatted
+ tests/io/test_parquet.py::test_parquet_read_geoparquet
+ tests/packaged_modules/test_cache.py::test_cache_multi_configs
+ tests/packaged_modules/test_cache.py::test_cache_single_config
+
tests/test_arrow_dataset.py::BaseDatasetTest::test_filter_caching_on_disk
+
tests/test_arrow_dataset.py::BaseDatasetTest::test_map_caching_on_disk
+ tests/test_distributed.py::test_torch_distributed_run
+ tests/test_file_utils.py::TestxPath::test_xpath_rglob
+ tests/test_file_utils.py::TestxPath::test_xpath_glob
+ tests/test_file_utils.py::test_xexists_private
+ tests/test_file_utils.py::test_xlistdir_private
+ tests/test_file_utils.py::test_xisdir_private
+ tests/test_file_utils.py::test_xisfile_private
+ tests/test_file_utils.py::test_xgetsize_private
+ tests/test_file_utils.py::test_xglob_private
+ tests/test_file_utils.py::test_xwalk_private
+ tests/test_hub.py::test_convert_to_parquet
+ tests/packaged_modules/test_cache.py::test_cache_capital_letters
+
tests/packaged_modules/test_folder_based_builder.py::test_data_files_with_different_levels_no_metadata
+
tests/packaged_modules/test_folder_based_builder.py::test_data_files_with_one_label_no_metadata
+
tests/test_data_files.py::test_DataFilesList_from_patterns_locally_with_extra_files
+
tests/test_data_files.py::test_DataFilesDict_from_patterns_locally_or_remote_hashing
+ tests/test_file_utils.py::test_xopen_remote
+ tests/test_hub.py::test_delete_from_hub
+ tests/test_offline_util.py::test_offline_with_timeout
+ tests/test_search.py::ElasticSearchIndexTest::test_elasticsearch
+ )
+ distutils-r1_src_test
+}
diff --git a/sci-ml/datasets/metadata.xml b/sci-ml/datasets/metadata.xml
index 94c112402049..f7e5d145210a 100644
--- a/sci-ml/datasets/metadata.xml
+++ b/sci-ml/datasets/metadata.xml
@@ -5,6 +5,10 @@
<email>[email protected]</email>
<name>Tupone Alfredo</name>
</maintainer>
+ <use>
+ <flag name="torch">Use pytorch</flag>
+ <flag name="vision">Support vision</flag>
+ </use>
<upstream>
<remote-id type="github">huggingface/datasets</remote-id>
<remote-id type="pypi">datasets</remote-id>