Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-nltk for openSUSE:Factory checked in at 2026-02-19 14:24:54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-nltk (Old) and /work/SRC/openSUSE:Factory/.python-nltk.new.1977 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-nltk" Thu Feb 19 14:24:54 2026 rev:20 rq:1333907 version:3.9.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-nltk/python-nltk.changes 2024-10-29 14:37:34.727559473 +0100 +++ /work/SRC/openSUSE:Factory/.python-nltk.new.1977/python-nltk.changes 2026-02-19 14:25:26.159841038 +0100 @@ -1,0 +2,8 @@ +Thu Feb 19 10:18:32 UTC 2026 - Daniel Garcia <[email protected]> + +- bsc#1258436, Fix: Secure ZIP extraction, CVE-2025-14009 +- Add upstream patch CVE-2025-14009.patch, Secure ZIP extraction to + prevent Zip-Slip and RCE in nltk.downloader. + (gh#nltk/nltk#3468) + +------------------------------------------------------------------- New: ---- CVE-2025-14009.patch ----------(New B)---------- New:- bsc#1258436, Fix: Secure ZIP extraction, CVE-2025-14009 - Add upstream patch CVE-2025-14009.patch, Secure ZIP extraction to prevent Zip-Slip and RCE in nltk.downloader. ----------(New E)---------- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-nltk.spec ++++++ --- /var/tmp/diff_new_pack.1z9w7H/_old 2026-02-19 14:25:29.143964845 +0100 +++ /var/tmp/diff_new_pack.1z9w7H/_new 2026-02-19 14:25:29.155965343 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-nltk # -# Copyright (c) 2024 SUSE LLC +# Copyright (c) 2026 SUSE LLC and contributors # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -65,6 +65,8 @@ ############################ # Source1: nltk_data.tar.xz Source99: python-nltk.rpmlintrc +# PATCH-FIX-UPSTREAM CVE-2025-14009.patch gh#nltk/nltk#3468 +Patch0: CVE-2025-14009.patch BuildRequires: %{python_module base >= 3.7} BuildRequires: %{python_module pip} BuildRequires: %{python_module setuptools} ++++++ CVE-2025-14009.patch ++++++ >From 36c38b60a562f2ff8591386f35565c31ddcc569f Mon Sep 17 00:00:00 2001 From: hyperps <[email protected]> Date: Thu, 20 Nov 2025 06:41:45 +0530 Subject: [PATCH 01/18] Fix: Secure ZIP extraction to prevent Zip-Slip and RCE in nltk.downloader --- nltk/downloader.py | 236 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 229 insertions(+), 7 deletions(-) Index: nltk-3.9.1/nltk/downloader.py =================================================================== --- nltk-3.9.1.orig/nltk/downloader.py +++ nltk-3.9.1/nltk/downloader.py @@ -2234,20 +2234,58 @@ def unzip(filename, root, verbose=True): def _unzip_iter(filename, root, verbose=True): + """ + Secure ZIP extraction with minimal behavioural changes. + + - Prevents classic Zip-Slip (.., absolute paths, drive letters) + - Prevents writes through pre-existing symlinks + - Preserves original extraction behaviour for valid archives + """ + if verbose: sys.stdout.write("Unzipping %s" % os.path.split(filename)[1]) sys.stdout.flush() try: zf = zipfile.ZipFile(filename) - except zipfile.BadZipFile: - yield ErrorMessage(filename, "Error with downloaded zip file") - return except Exception as e: yield ErrorMessage(filename, e) return - zf.extractall(root) + # Canonical root + root_abs = os.path.abspath(root) + root_real = os.path.realpath(root_abs) + root_prefix = root_real.rstrip(os.sep) + os.sep + + # Ensure the extraction root directory exists + os.makedirs(root, exist_ok=True) + + try: + for member in zf.namelist(): + + # Construct target path + raw_target = os.path.join(root_abs, member) + target_abs = os.path.abspath(raw_target) + + # Zip-Slip check (absolute/traversal/drive-letter cases) + if not target_abs.startswith(root_prefix): + yield ErrorMessage(filename, f"Zip Slip blocked: {member}") + continue + + # Symlink-escape check + target_real = os.path.realpath(target_abs) + if not target_real.startswith(root_prefix): + yield ErrorMessage(filename, f"Symlink escape blocked: {member}") + continue + + # Safe extraction + try: + zf.extract(member, root) + except Exception as e: + yield ErrorMessage(filename, f"Extraction error for {member}: {e}") + continue + finally: + zf.close() if verbose: print() Index: nltk-3.9.1/nltk/tag/mapping.py =================================================================== --- nltk-3.9.1.orig/nltk/tag/mapping.py +++ nltk-3.9.1/nltk/tag/mapping.py @@ -32,7 +32,7 @@ X - other: foreign words, typos, abbrevi from collections import defaultdict from os.path import join -from nltk.data import load +from nltk.data import load, normalize_resource_url _UNIVERSAL_DATA = "taggers/universal_tagset" _UNIVERSAL_TAGS = ( @@ -56,7 +56,10 @@ _MAPPINGS = defaultdict(lambda: defaultd def _load_universal_map(fileid): - contents = load(join(_UNIVERSAL_DATA, fileid + ".map"), format="text") + resource = normalize_resource_url( + f"nltk:{_UNIVERSAL_DATA.rstrip('/')}/{fileid.lstrip('/')}.map" + ) + contents = load(resource, format="text") # When mapping to the Universal Tagset, # map unknown inputs to 'X' not 'UNK' Index: nltk-3.9.1/nltk/test/unit/test_downloader_unzip.py =================================================================== --- /dev/null +++ nltk-3.9.1/nltk/test/unit/test_downloader_unzip.py @@ -0,0 +1,233 @@ +import os +import sys +import zipfile +from pathlib import Path + +import pytest + +from nltk.downloader import ErrorMessage, _unzip_iter + + +def _make_zip(file_path: Path, members: dict[str, bytes]) -> None: + """ + Create a ZIP file at file_path, with the given arcname->content mapping. + """ + with zipfile.ZipFile(file_path, "w") as zf: + for arcname, content in members.items(): + zf.writestr(arcname, content) + + +def _run_unzip_iter(zip_path: Path, extract_root: Path, verbose: bool = False): + """ + Convenience wrapper that runs _unzip_iter and returns the list of yielded + messages (if any). + """ + return list(_unzip_iter(str(zip_path), str(extract_root), verbose=verbose)) + + +class TestSecureUnzip: + """ + Tests for secure ZIP extraction behaviour in nltk.downloader._unzip_iter. + + These tests are specifically designed so that: + + * On the old implementation (using zf.extractall(root) without checks), + the "Zip-Slip" tests will fail because they rely on the new behaviour + (yielding ErrorMessage for escaping entries, and NOT writing outside + the extraction root). + + * On the new implementation, the tests pass, because the new path + validation prevents escapes and emits the expected ErrorMessage. + """ + + def test_normal_relative_paths_are_extracted(self, tmp_path: Path) -> None: + """ + A ZIP with only safe, relative paths should fully extract under the + given root, and should not yield any ErrorMessage. + """ + zip_path = tmp_path / "safe.zip" + extract_root = tmp_path / "extract" + + members = { + "pkg/file.txt": b"hello", + "pkg/subdir/other.txt": b"world", + } + _make_zip(zip_path, members) + + messages = _run_unzip_iter(zip_path, extract_root, verbose=False) + + # No ErrorMessage should be yielded for valid archives. + assert not any(isinstance(m, ErrorMessage) for m in messages) + + assert (extract_root / "pkg" / "file.txt").read_bytes() == b"hello" + assert (extract_root / "pkg" / "subdir" / "other.txt").read_bytes() == b"world" + + def test_zip_slip_with_parent_directory_component_is_blocked( + self, tmp_path: Path + ) -> None: + """ + An entry containing ``..`` that would escape the target directory + must not be written outside the extraction root, and must cause + _unzip_iter to yield an ErrorMessage. + + On the old implementation (extractall(root)), this test will fail + because the file outside the root is actually created and no + ErrorMessage is yielded. + """ + zip_path = tmp_path / "zip_slip_parent.zip" + extract_root = tmp_path / "extract" + + # This is the path that would be written if "../outside.txt" is + # extracted with extractall(root). + outside_target = (extract_root / ".." / "outside.txt").resolve() + + members = { + "pkg/good.txt": b"ok", + # This would escape extract_root if not validated. + "../outside.txt": b"evil", + } + _make_zip(zip_path, members) + + messages = _run_unzip_iter(zip_path, extract_root, verbose=False) + + # With the secure implementation, the escaping entry must be blocked + # and reported as an ErrorMessage. + err_msgs = [m for m in messages if isinstance(m, ErrorMessage)] + assert ( + err_msgs + ), "Expected an ErrorMessage for a Zip-Slip parent-directory attempt" + + # Check that the message text indicates it was blocked. Adjust to the + # exact wording if needed. + combined_messages = " ".join(str(m.message) for m in err_msgs) + assert "Zip Slip" in combined_messages and "blocked" in combined_messages + + # The escaping path must not have been written. + assert not outside_target.exists() + + # The safe entry should still be extracted under the root. + assert (extract_root / "pkg" / "good.txt").read_bytes() == b"ok" + + @pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Absolute POSIX paths are not meaningful on Windows", + ) + def test_zip_slip_with_absolute_posix_path_is_blocked(self, tmp_path: Path) -> None: + """ + An entry with an absolute POSIX path (e.g. ``/tmp/evil``) must not be + extracted as-is; it should not overwrite arbitrary filesystem paths, + and should result in an ErrorMessage. + + On the old implementation (extractall(root)), this test will fail + because the absolute path gets created without any ErrorMessage. + """ + zip_path = tmp_path / "zip_slip_abs_posix.zip" + extract_root = tmp_path / "extract" + + # Choose a unique location in /tmp to avoid interfering with real files. + absolute_target = Path("/tmp") / f"nltk_zip_slip_test_{os.getpid()}" + + try: + members = { + "pkg/good.txt": b"ok", + # This absolute path should never be created by the secure + # implementation. + str(absolute_target): b"evil", + } + _make_zip(zip_path, members) + + messages = _run_unzip_iter(zip_path, extract_root, verbose=False) + + err_msgs = [m for m in messages if isinstance(m, ErrorMessage)] + assert ( + err_msgs + ), "Expected an ErrorMessage for absolute-path Zip-Slip attempt" + + combined_messages = " ".join(str(m.message) for m in err_msgs) + assert "Zip Slip" in combined_messages and "blocked" in combined_messages + + # Absolute path must not be created by the secure implementation. + assert not absolute_target.exists() + + # Safe entry must still be extracted under extract_root. + assert (extract_root / "pkg" / "good.txt").read_bytes() == b"ok" + finally: + # Best-effort cleanup if the implementation under test behaves + # incorrectly and creates the file. + if absolute_target.exists(): + try: + absolute_target.unlink() + except OSError: + # Ignore cleanup errors: file may not exist or be locked. + pass + + def test_entries_resolved_outside_root_are_blocked_via_symlink( + self, tmp_path: Path + ) -> None: + """ + If there is a pre-existing symlink below the extraction root that + points outside the root, writing through that symlink should not + be allowed to escape the root. + + This test documents the desired hardening to defend against this + class of attacks. + """ + if not hasattr(os, "symlink"): + pytest.skip("Symlinks not supported on this platform") + + zip_path = tmp_path / "zip_slip_symlink.zip" + extract_root = tmp_path / "extract" + outside_dir = tmp_path / "outside_dir" + outside_dir.mkdir() + outside_target = outside_dir / "evil.txt" + + members = { + "pkg/good.txt": b"ok", + "dir_link/evil.txt": b"evil", + } + _make_zip(zip_path, members) + + extract_root.mkdir() + # Pre-existing symlink inside extract_root pointing *outside* it. + os.symlink(outside_dir, extract_root / "dir_link") + + messages = _run_unzip_iter(zip_path, extract_root, verbose=False) + + # Desired property: + assert not outside_target.exists() + assert (extract_root / "pkg" / "good.txt").read_bytes() == b"ok" + # Should yield an ErrorMessage for the blocked symlink escape + assert any(isinstance(m, ErrorMessage) for m in messages) + + def test_bad_zipfile_yields_errormessage(self, tmp_path: Path) -> None: + """ + A corrupt or non-zip file should cause _unzip_iter to yield an + ErrorMessage instead of raising an unhandled exception. + """ + zip_path = tmp_path / "not_a_zip.txt" + zip_path.write_bytes(b"this is not a zip archive") + extract_root = tmp_path / "extract" + + messages = _run_unzip_iter(zip_path, extract_root, verbose=False) + + assert any(isinstance(m, ErrorMessage) for m in messages) + + # If the implementation chooses to create the root directory at all, + # it should not leave partially extracted content. + if extract_root.exists(): + assert not any(extract_root.iterdir()) + + def test_unzip_iter_verbose_writes_to_stdout(self, capsys, tmp_path: Path) -> None: + """ + When verbose=True, _unzip_iter should write a status line to stdout. + This checks that existing user-visible behaviour is preserved. + """ + zip_path = tmp_path / "verbose.zip" + extract_root = tmp_path / "extract" + + members = {"pkg/file.txt": b"data"} + _make_zip(zip_path, members) + + _run_unzip_iter(zip_path, extract_root, verbose=True) + captured = capsys.readouterr() + assert "Unzipping" in captured.out
