commit:     8fc9ac325105fab5a44261fd4874d87738d31c43
Author:     Michał Górny <mgorny <AT> gentoo <DOT> org>
AuthorDate: Sat Oct 23 20:37:15 2021 +0000
Commit:     Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Sat Oct 23 20:46:04 2021 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=8fc9ac32

dev-python/nltk-data: Update data files for 20211023

Closes: https://bugs.gentoo.org/819780
Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>

 dev-python/nltk-data/Manifest                      |  3 +-
 ...0200312-r1.ebuild => nltk-data-20211023.ebuild} | 61 ++++++++++++++--------
 2 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/dev-python/nltk-data/Manifest b/dev-python/nltk-data/Manifest
index fa415ee7f30..0a6f8624fba 100644
--- a/dev-python/nltk-data/Manifest
+++ b/dev-python/nltk-data/Manifest
@@ -81,7 +81,7 @@ DIST nltk-smultron-20200312.zip 166207 BLAKE2B 
d0c3e75dd108965e260d913e0c02137da
 DIST nltk-snowball_data-20200312.zip 6785405 BLAKE2B 
44c10439b142540ac7eece967efa1431fd8f45342f0a90875dacf29ad374fe4c7d30af11d42ba45e0f1ec1836d56b2ff684ee352c5e8536cfb5db5eb7632285b
 SHA512 
6c8a9259d88f6f7f499867d83b731de99d7fa4e8827ecedf836f653fc1a810efa9f6c5c6e2720a9e6610bc00978956b6a119bd08b70e3e241c4e9faccddd81d8
 DIST nltk-spanish_grammars-20200312.zip 4047 BLAKE2B 
d8a8dbb558850a6a60f1fe5ab0f617f3a0f3c64bc7d49980cf793d374c6679d1bd42afb7e61776737b5eec162f2520abf2ee3acc92ea9ee0f397c3089b3b5b28
 SHA512 
4513347156e9351c259c0e2448198d68354bbc95e0a54561c31a88f13f333ebcba3e294c820cb62036665f2904eb6a7137546cd580e361c0423c30a8aed950b2
 DIST nltk-state_union-20200312.zip 808757 BLAKE2B 
2d352af0ced736d3c11a821eaba0b035b3b5b6b0f20db3bd5d4ac2451f99daf68bffd3ad984bd404ecc4a1e67ca9281c529af2cc9e295a3a7330f36ee9640bc8
 SHA512 
a6fc83e6230e57ba66a7af62f0d2a5d44a14530ed1e0e914590b3f8b5bf939967c126a5e6f6899ba9134843893f65212e836d311109393c1200191a5c3163485
-DIST nltk-stopwords-20200312.zip 23047 BLAKE2B 
a0677cc0d4a3d54ee6e27eab8fb7635d6cd29265204896870e57457a54459f1d6cabc0c4e76e749397f5eee299eed0b524fcbc2033ea17d81cdb6cd98c5ed968
 SHA512 
31774fd3db2e0fba0209db71c08f9b2d971311ea4a59739cfdc0a9ae34f6c6c593f2a991a14ee20b0de8b380215e609f8398ed50c546775322ab8c4c3f8d06dd
+DIST nltk-stopwords-20211023.zip 26220 BLAKE2B 
50219508c5fb24c1392064ea0546ca9060829f51689c0d626bb99e3fa8f712df98ed475fc0d27d99f934fb156ce65df91a8e7c22e1b4f16833339fb0aea34a9e
 SHA512 
8308623953560281288b64e695638ca3fa28e1b6201b538d01650d6dfd08e821687217c8d012e93adfa2a48afebdda11af1bb86d638358c2931f36754d1e15ef
 DIST nltk-subjectivity-20200312.zip 521628 BLAKE2B 
0a8777a5b91b1b825fbde41cc927d496480129f0a810349bddde2036ed16f37611f2bc3b007e74fe36523612529a92433d32d094be72d247f5faef8220c3c491
 SHA512 
a3cc4d2d20f26c5eabd86fbced2c013e69d46e607013278eb35831a62e57523a17aec1b580ba62c7a867e61a561e1b222d8430f0c1e2d429a9479e12b008c5b7
 DIST nltk-swadesh-20200312.zip 22828 BLAKE2B 
1cd9d5355c6b53694ec545ca001b0807b4912a7878ba075b0f81ce8b9b22a5c7a18cf52cf2449483a1c89cd20d8d86986dc3d827fa93a7ef5824fddbc0922025
 SHA512 
90cb32532a5378d05ce34b84b5f8363dbb32f24afac58b0dcc5cdccba98fd7d37def7f4fbe76b11c8c64059bd19df745562bfbf5f4c721d65ce9f4be1348ab76
 DIST nltk-switchboard-20200312.zip 791161 BLAKE2B 
211116a751ae246fa31b6aca96b396d3642d89ad112588a09f8d91a5b76dc41c7fb4d36c16c6358cd8e0da8056bc83598ed0dc635cf7b1fd8469a0e80b5f1761
 SHA512 
690e5392dc082c4ac550bde2848aa65117e7a25cbc4bce0887581c531d03be64e21f044ac0a3286648255f0edd7766b1161f5575ad5fd680c7303b34c3226b8f
@@ -102,6 +102,7 @@ DIST nltk-webtext-20200312.zip 646297 BLAKE2B 
ca072fc38c144b659c76c36c9161641c91
 DIST nltk-wmt15_eval-20200312.zip 383096 BLAKE2B 
119943db4240171077569b3302c678644c2c9547ba67bfd055751059e0a3ad3ab6a19e4eedf9108d313d46dcd36cf19e11d973981da8c70a01c4cb790a7bd739
 SHA512 
362395d4c77ebe92f4c19fa8c2000082dc7a2343acc19cccb596ca00db6c40c231b904d807f46e2691cc4c4a0c79d14873b2a1983a494f2ca1485d540d787ceb
 DIST nltk-word2vec_sample-20200312.zip 49396025 BLAKE2B 
0512b9bb7121a528190079f578e82b4e8f8021bfc0062cfa5613d260f3eee17460aecfeaacb65d950e79d27a653c78633a88c3638ec16377e2dbc3006387ebaa
 SHA512 
09c30a4ab8f9fb6a5b36974b5953260d01cb4f285827fb90a374d054ad775ac978602ab56c452f46f4f8601312e232fd739d2f54dafa44ab8b7b01831cf0d9d1
 DIST nltk-wordnet-20200312.zip 10775600 BLAKE2B 
dac56a8fb1fa6882b1871c394ad2acb2d3be739c424570e27c89fb6983df5f896a8f359092ba82752ddfc0531d83563a219e85f80124202f29bda93181efe4dc
 SHA512 
1923a8bcd56fa0b9a9de91f53070dce28c3a7efbab11d2ef55c87134b1bf30de0f40abab59c39eb15dce54aec9491d8a5a259de212ff4cb25cde0ad09317009a
+DIST nltk-wordnet31-20211023.zip 11055271 BLAKE2B 
e41a1951af5a71c9506d1e948b860574c94ab0ef31c1789a7e7bfb29c6dccea5b1d8895007631f7b595e9f90306365b5042e7a80dc6e1364fdbf4a5f0cba3b28
 SHA512 
a86091bd55e3a706892550b232be8f5199092623f1f8305d8c9be967a8527fe7d4ecb6250c369b229fdf52b6f3008106b758adc355fa2ad08b5b0cf2a458c173
 DIST nltk-wordnet_ic-20200312.zip 12056682 BLAKE2B 
c2dc2a646015b23699a72f636b588ec5718c70e6941d9d56863257e1e0396c8cf59ac1dc6ed74e5d7f0c2ee9129d63221a03967bf66a3d335e99160f295ed44d
 SHA512 
1c94451a13af6c76bff60a0cab2e70402a3d9abd2e8fc62a5473f24ab4229feb0afe4faa8d389734697a6cf86d2c8b1dc700bb3afa3cbc279b75d7e0ec19fc6d
 DIST nltk-words-20200312.zip 757777 BLAKE2B 
eaaaaab6c26e206e9b6ce45daf779e3cc6706a06132afeabf013026d0009caee2d678f3c4ea9125b9654f7143bef29ec7a5706b79e5650ea556c6821b7754e6f
 SHA512 
2810f05d3fc7ee6b6f8636fa1ff7b4e8c8cdac12b415cc54d15c69102290122ea138ec4fa36cb483f790c1ac10b0f83ae4c2c3e0e8df7e67e90e962ee5dbb0be
 DIST nltk-ycoe-20200312.zip 477 BLAKE2B 
574835aa011a06a06363e26facd6a6f583a1dc1cac2de39adff59d8ab48eefac030b43d935a2f79af855259f2a9a571193dae2811589483af97406ff05c76c9e
 SHA512 
e39ce165074d10ff63cb84ea52905d7ecb937797c8123ed113c5609afe1f63ac44d04d48a681002c4eac21dc9076ac74164b886c6f9ce42f3a102c38d1e8e756

diff --git a/dev-python/nltk-data/nltk-data-20200312-r1.ebuild 
b/dev-python/nltk-data/nltk-data-20211023.ebuild
similarity index 80%
rename from dev-python/nltk-data/nltk-data-20200312-r1.ebuild
rename to dev-python/nltk-data/nltk-data-20211023.ebuild
index 4a3d58c5db8..df8437c785b 100644
--- a/dev-python/nltk-data/nltk-data-20200312-r1.ebuild
+++ b/dev-python/nltk-data/nltk-data-20211023.ebuild
@@ -1,4 +1,4 @@
-# Copyright 2020 Gentoo Authors
+# Copyright 2020-2021 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 EAPI=7
@@ -18,7 +18,7 @@ RESTRICT="bindist mirror"
 
 BDEPEND="app-arch/unzip"
 
-PACKAGES_ZIP=(
+PACKAGES_ZIP_2020=(
        # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m 
'//package[@unzip=0]' -v @subdir -o "/" -v @id -n - | sort
        corpora/comtrans
        corpora/conll2007
@@ -36,7 +36,7 @@ PACKAGES_ZIP=(
        stemmers/snowball_data
 )
 
-PACKAGES_UNPACK=(
+PACKAGES_UNPACK_2020=(
        # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m 
'//package[@unzip=1]' -v @subdir -o "/" -v @id -n - | sort
        corpora/abc
        corpora/alpino
@@ -85,7 +85,6 @@ PACKAGES_UNPACK=(
        corpora/shakespeare
        corpora/sinica_treebank
        corpora/state_union
-       corpora/stopwords
        corpora/subjectivity
        corpora/swadesh
        corpora/switchboard
@@ -116,7 +115,12 @@ PACKAGES_UNPACK=(
        tokenizers/punkt
 )
 
-PACKAGES_UNPACK_EXTRA=(
+PACKAGES_UNPACK_2021=(
+       corpora/stopwords
+       corpora/wordnet31
+)
+
+PACKAGES_UNPACK_EXTRA_2020=(
        chunkers/maxent_ne_chunker
        corpora/biocreative_ppi
        corpora/brown_tei
@@ -137,48 +141,63 @@ PACKAGES_UNPACK_EXTRA=(
 )
 
 add_data() {
-       local x
+       local x version=${1}
+       shift
+
        for x; do
                SRC_URI+="
                        
https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${x}.zip
-                               -> nltk-${x#*/}-${PV}.zip"
+                               -> nltk-${x#*/}-${version}.zip"
        done
 }
 
-add_data "${PACKAGES_ZIP[@]}" "${PACKAGES_UNPACK[@]}"
+add_data 20200312 "${PACKAGES_ZIP_2020[@]}" "${PACKAGES_UNPACK_2020[@]}"
+add_data 20211023 "${PACKAGES_UNPACK_2021[@]}"
 SRC_URI+="
        extra? ("
-add_data "${PACKAGES_UNPACK_EXTRA[@]}"
+add_data 20200312 "${PACKAGES_UNPACK_EXTRA_2020[@]}"
 SRC_URI+="
        )"
 
 CHECKREQS_DISK_USR=3G
 CHECKREQS_DISK_BUILD=${CHECKREQS_DISK_USR}
 
-src_unpack() {
-       local x
-       local to_unpack=( "${PACKAGES_UNPACK[@]}" )
-       use extra && to_unpack+=( "${PACKAGES_UNPACK_EXTRA[@]}" )
-       for x in "${to_unpack[@]}"; do
+unpack_data() {
+       local x version=${1}
+       shift
+
+       for x; do
                local cat=${x%/*}
                local pkg=${x#*/}
 
                mkdir -p "${S}/${cat}" || die
                cd "${S}/${cat}" || die
-               unpack "nltk-${pkg}-${PV}.zip"
+               unpack "nltk-${pkg}-${version}.zip"
        done
 }
 
-src_install() {
-       dodir /usr/share/nltk_data
-       mv * "${ED}/usr/share/nltk_data/" || die
+src_unpack() {
+       unpack_data 20200312 "${PACKAGES_UNPACK_2020[@]}"
+       unpack_data 20211023 "${PACKAGES_UNPACK_2021[@]}"
+       use extra && unpack_data 20200312 "${PACKAGES_UNPACK_EXTRA_2020[@]}"
+}
+
+install_zips() {
+       local x version=${1}
+       shift
 
-       local x
-       for x in "${PACKAGES_ZIP[@]}"; do
+       for x; do
                local cat=${x%/*}
                local pkg=${x#*/}
 
                insinto "/usr/share/nltk_data/${cat}"
-               newins "${DISTDIR}/nltk-${pkg}-${PV}.zip" "${pkg}.zip"
+               newins "${DISTDIR}/nltk-${pkg}-${version}.zip" "${pkg}.zip"
        done
 }
+
+src_install() {
+       dodir /usr/share/nltk_data
+       mv * "${ED}/usr/share/nltk_data/" || die
+
+       install_zips 20200312 "${PACKAGES_ZIP_2020[@]}"
+}

Reply via email to