Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-unearth for openSUSE:Factory checked in at 2023-10-23 23:41:08 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-unearth (Old) and /work/SRC/openSUSE:Factory/.python-unearth.new.1945 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-unearth" Mon Oct 23 23:41:08 2023 rev:5 rq:1119610 version:0.12.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-unearth/python-unearth.changes 2023-06-16 16:56:11.890186688 +0200 +++ /work/SRC/openSUSE:Factory/.python-unearth.new.1945/python-unearth.changes 2023-10-23 23:41:15.172156231 +0200 @@ -1,0 +2,21 @@ +Mon Oct 23 11:10:27 UTC 2023 - Dirk Müller <dmuel...@suse.com> + +- update to 0.12.0 (bsc#1216480, CVE-2023-45805): + * Add callback to report download status + * Respect :all: in prefer_binary config + * security: Validate the package name extracted from the part + before the last hyphen + * Also fallback on "token" username for KeyringCliProvider + * Revert the handling of 403 and 404 + * Handle 403 same as 401, and look for credentials on 404 error + * Close response files correctly + * Change FormatControl to respect the priority of only_binary + over no_binary + * Include useful message if VCS command not found + * Postpone the validation of hashes + * Migrate from deprecated pdm.pep517 to pdm.backend + * Tolerate invalid python requires + * Fix the dist info link evaluation + * PEP 714: rename the dist-info-metadata field to core-metadata + +------------------------------------------------------------------- Old: ---- unearth-0.9.1.tar.gz New: ---- unearth-0.12.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-unearth.spec ++++++ --- /var/tmp/diff_new_pack.YwjF7Z/_old 2023-10-23 23:41:15.916183241 +0200 +++ /var/tmp/diff_new_pack.YwjF7Z/_new 2023-10-23 23:41:15.916183241 +0200 @@ -18,7 +18,7 @@ %{?sle15_python_module_pythons} Name: python-unearth -Version: 0.9.1 +Version: 0.12.0 Release: 0 Summary: A utility to fetch and download python packages License: MIT @@ -27,7 +27,7 @@ BuildRequires: %{python_module base >= 3.7} BuildRequires: %{python_module cached-property >= 1.5.2 if %python-base < 3.8} BuildRequires: %{python_module packaging >= 20} -BuildRequires: %{python_module pdm-pep517} +BuildRequires: %{python_module pdm-backend} BuildRequires: %{python_module pip} BuildRequires: %{python_module requests >= 2.25} BuildRequires: fdupes ++++++ unearth-0.9.1.tar.gz -> unearth-0.12.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/PKG-INFO new/unearth-0.12.0/PKG-INFO --- old/unearth-0.9.1/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 +++ new/unearth-0.12.0/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 @@ -1,23 +1,27 @@ Metadata-Version: 2.1 Name: unearth -Version: 0.9.1 +Version: 0.12.0 Summary: A utility to fetch and download python packages +Author-Email: Frost Ming <m...@frostming.com> License: MIT -Author-email: Frost Ming <m...@frostming.com> -Requires-Python: >=3.7 Classifier: Development Status :: 3 - Alpha Classifier: Intended Audience :: Developers Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3 :: Only -Classifier: Programming Language :: Python :: 3.10 -Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 -Provides-Extra: keyring -Project-URL: Changelog, https://github.com/frostming/unearth/releases -Project-URL: Documentation, https://unearth.readthedocs.io +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3 :: Only Project-URL: Homepage, https://github.com/frostming/unearth +Project-URL: Documentation, https://unearth.readthedocs.io +Project-URL: Changelog, https://github.com/frostming/unearth/releases +Requires-Python: >=3.7 +Requires-Dist: packaging>=20 +Requires-Dist: requests>=2.25 +Requires-Dist: cached-property>=1.5.2; python_version < "3.8" +Requires-Dist: keyring; extra == "keyring" +Provides-Extra: keyring Description-Content-Type: text/markdown # unearth @@ -92,4 +96,3 @@ ## Documentation [Read the docs](https://unearth.readthedocs.io/en/latest/) - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/pyproject.toml new/unearth-0.12.0/pyproject.toml --- old/unearth-0.9.1/pyproject.toml 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/pyproject.toml 2023-10-20 09:21:05.348673800 +0200 @@ -1,8 +1,8 @@ [build-system] requires = [ - "pdm-pep517", + "pdm-backend", ] -build-backend = "pdm.pep517.api" +build-backend = "pdm.backend" [project] name = "unearth" @@ -29,7 +29,7 @@ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3 :: Only", ] -version = "0.9.1" +version = "0.12.0" [project.license] text = "MIT" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/auth.py new/unearth-0.12.0/src/unearth/auth.py --- old/unearth-0.9.1/src/unearth/auth.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/auth.py 2023-10-20 09:20:50.556266500 +0200 @@ -71,11 +71,12 @@ self.keyring = cmd def get_auth_info(self, url: str, username: str | None) -> AuthInfo | None: - if username is not None: - logger.debug("Getting password from keyring CLI for %s@%s", username, url) - password = self._get_password(url, username) - if password is not None: - return username, password + if username is None: + username = "__token__" + logger.debug("Getting password from keyring CLI for %s@%s", username, url) + password = self._get_password(url, username) + if password is not None: + return username, password return None def save_auth_info(self, url: str, username: str, password: str) -> None: @@ -175,7 +176,7 @@ self, original_url: str, *, - allow_netrc: bool = False, + allow_netrc: bool = True, allow_keyring: bool = False, ) -> tuple[str | None, str | None]: """Find and return credentials for the specified URL.""" @@ -235,7 +236,9 @@ _, url = split_auth_from_url(original_url) netloc = urlparse(url).netloc # Try to get credentials from original url - username, password = self._get_new_credentials(original_url) + username, password = self._get_new_credentials( + original_url, allow_netrc=True, allow_keyring=False + ) # If credentials not found, use any stored credentials for this netloc. # Do this if either the username or the password is missing. @@ -285,27 +288,27 @@ return input("Save credentials to keyring [y/N]: ") == "y" def handle_401(self, resp: Response, **kwargs: Any) -> Response: - # We only care about 401 responses, anything else we want to just + # We only care about 401 response, anything else we want to just # pass through the actual response if resp.status_code != 401: return resp - # We are not able to prompt the user so simply return the response - if not self.prompting: - return resp - parsed = urlparse(cast(str, resp.url)) # Query the keyring for credentials: username, password = self._get_new_credentials( resp.url, - allow_netrc=True, + allow_netrc=False, allow_keyring=True, ) # Prompt the user for a new username and password save = False if not username and not password: + # We are not able to prompt the user so simply return the response + if not self.prompting: + return resp + username, password, save = self._prompt_for_password(parsed.netloc) # Store the new username and password to use for future requests @@ -344,7 +347,8 @@ """Response callback to warn about incorrect credentials.""" if resp.status_code == 401: logger.warning( - "401 Error, Credentials not correct for %s", + "%s Error, Credentials not correct for %s", + resp.status_code, resp.request.url, ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/collector.py new/unearth-0.12.0/src/unearth/collector.py --- old/unearth-0.9.1/src/unearth/collector.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/collector.py 2023-10-20 09:20:50.556266500 +0200 @@ -61,10 +61,12 @@ url = parse.urljoin(base_url, href) requires_python = anchor.get("data-requires-python") yank_reason = anchor.get("data-yanked") - data_dist_info_metadata = anchor.get("data-dist-info-metadata") + metadata_hash = anchor.get( + "data-core-metadata", anchor.get("data-dist-info-metadata") + ) dist_info_metadata: bool | dict[str, str] | None = None - if data_dist_info_metadata: - hash_name, has_hash, hash_value = data_dist_info_metadata.partition("=") + if metadata_hash: + hash_name, has_hash, hash_value = metadata_hash.partition("=") if has_hash: dist_info_metadata = {hash_name: hash_value} else: @@ -90,7 +92,7 @@ requires_python: str | None = file.get("requires-python") yank_reason: str | None = file.get("yanked") or None dist_info_metadata: bool | dict[str, str] | None = file.get( - "data-dist-info-metadata" + "core-metadata", file.get("data-dist-info-metadata") ) hashes: dict[str, str] | None = file.get("hashes") yield Link( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/evaluator.py new/unearth-0.12.0/src/unearth/evaluator.py --- old/unearth-0.9.1/src/unearth/evaluator.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/evaluator.py 2023-10-20 09:20:50.556266500 +0200 @@ -4,15 +4,16 @@ import dataclasses as dc import hashlib import logging +import os import sys from typing import Any -from urllib.parse import urlencode import packaging.requirements from packaging.specifiers import InvalidSpecifier, SpecifierSet from packaging.tags import Tag from packaging.utils import ( InvalidWheelFilename, + NormalizedName, canonicalize_name, parse_wheel_filename, ) @@ -21,7 +22,12 @@ from unearth.link import Link from unearth.pep425tags import get_supported -from unearth.utils import ARCHIVE_EXTENSIONS, splitext, strip_extras +from unearth.utils import ( + ARCHIVE_EXTENSIONS, + fix_legacy_specifier, + splitext, + strip_extras, +) logger = logging.getLogger(__name__) @@ -98,23 +104,29 @@ @dc.dataclass(frozen=True) class FormatControl: - only_binary: bool = False - no_binary: bool = False + only_binary: set[NormalizedName] = dc.field(default_factory=set) + no_binary: set[NormalizedName] = dc.field(default_factory=set) - def __post_init__(self): - if self.only_binary and self.no_binary: - raise ValueError( - "Not allowed to set only_binary and no_binary at the same time." - ) + def get_allowed_formats(self, canonical_name: NormalizedName) -> set[str]: + allowed_formats = {"binary", "source"} + if canonical_name in self.only_binary: + allowed_formats.discard("source") + elif canonical_name in self.no_binary: + allowed_formats.discard("binary") + elif ":all:" in self.only_binary: + allowed_formats.discard("source") + elif ":all:" in self.no_binary: + allowed_formats.discard("binary") + return allowed_formats def check_format(self, link: Link, project_name: str) -> None: - if self.only_binary: - if not link.is_wheel: - raise LinkMismatchError(f"only binaries are allowed for {project_name}") - if self.no_binary: - if link.is_wheel: - raise LinkMismatchError(f"no binary is allowed for {project_name}") - return + allowed_formats = self.get_allowed_formats(canonicalize_name(project_name)) + if link.is_wheel and "binary" not in allowed_formats: + raise LinkMismatchError(f"binary wheel is not allowed for {project_name}") + if not link.is_wheel and "source" not in allowed_formats: + raise LinkMismatchError( + f"source distribution is not allowed for {project_name}" + ) @dc.dataclass @@ -124,16 +136,13 @@ Args: package_name (str): The links must match the package name target_python (TargetPython): The links must match the target Python - hashes (dict[str, list[str]): The links must have the correct hashes ignore_compatibility (bool): Whether to ignore the compatibility check allow_yanked (bool): Whether to allow yanked candidates format_control (bool): Format control flags """ package_name: str - session: Session target_python: TargetPython = dc.field(default_factory=TargetPython) - hashes: dict[str, list[str]] = dc.field(default_factory=dict) ignore_compatibility: bool = False allow_yanked: bool = False format_control: FormatControl = dc.field(default_factory=FormatControl) @@ -141,17 +150,19 @@ def __post_init__(self) -> None: self._canonical_name = canonicalize_name(self.package_name) - def _check_yanked(self, link: Link) -> None: + def check_yanked(self, link: Link) -> None: if link.yank_reason is not None and not self.allow_yanked: yank_reason = f"due to {link.yank_reason}" if link.yank_reason else "" raise LinkMismatchError(f"Yanked {yank_reason}") - def _check_requires_python(self, link: Link) -> None: + def check_requires_python(self, link: Link) -> None: if not self.ignore_compatibility and link.requires_python: py_ver = self.target_python.py_ver or sys.version_info[:2] py_version = ".".join(str(v) for v in py_ver) try: - requires_python = SpecifierSet(link.requires_python) + requires_python = SpecifierSet( + fix_legacy_specifier(link.requires_python) + ) except InvalidSpecifier: raise LinkMismatchError( f"Invalid requires-python: {link.requires_python}" @@ -164,54 +175,14 @@ ), ) - def _check_hashes(self, link: Link) -> None: - def hash_mismatch( - hash_name: str, given_hash: str, allowed_hashes: list[str] - ) -> None: - raise LinkMismatchError( - f"Hash mismatch, expected: {allowed_hashes}\n" - f"got: {hash_name}:{given_hash}" - ) - - if not self.hashes: - return - link_hashes = link.hash_option - if link_hashes: - for hash_name, allowed_hashes in self.hashes.items(): - if hash_name in link_hashes: - given_hash = link_hashes[hash_name][0] - if given_hash not in allowed_hashes: - hash_mismatch(hash_name, given_hash, allowed_hashes) - return - - hash_name, allowed_hashes = next(iter(self.hashes.items())) - given_hash = self._get_hash(link, hash_name) - if given_hash not in allowed_hashes: - hash_mismatch(hash_name, given_hash, allowed_hashes) - - def _get_hash(self, link: Link, hash_name: str) -> str: - resp = self.session.get(link.normalized, stream=True) - hasher = hashlib.new(hash_name) - for chunk in resp.iter_content(chunk_size=1024 * 8): - hasher.update(chunk) - digest = hasher.hexdigest() - # Store the hash on the link for future use - fragment_dict = link._fragment_dict - fragment_dict.pop(link.hash_name, None) # type: ignore - fragment_dict[hash_name] = digest - link.__dict__["parsed"] = link.parsed._replace( - fragment=urlencode(fragment_dict) - ) - return digest - def evaluate_link(self, link: Link) -> Package | None: """ Evaluate the link and return the package if it matches or None if it doesn't. """ try: self.format_control.check_format(link, self.package_name) - self._check_yanked(link) - self._check_requires_python(link) + self.check_yanked(link) + self.check_requires_python(link) version: str | None = None if link.is_wheel: try: @@ -242,18 +213,37 @@ raise LinkMismatchError( f"Unsupported archive format: {link.filename}" ) - version = parse_version_from_egg_info(egg_info, self._canonical_name) - if version is None: - raise LinkMismatchError( - f"Missing version in the filename {egg_info}" + LOOSE_FILENAME = os.getenv( + "UNEARTH_LOOSE_FILENAME", "false" + ).lower() in ("1", "true") + if LOOSE_FILENAME: + version = parse_version_from_egg_info( + egg_info, self._canonical_name ) + if version is None: + raise LinkMismatchError( + f"Missing version in the filename: {egg_info}" + ) + else: + # For source releases, we know that the version should not contain + # any hyphens, so we can split on the last hyphen to get the + # project name. + filename_prefix, has_version, version = egg_info.rpartition("-") + if not has_version: + raise LinkMismatchError( + f"Missing version in the filename: {egg_info}" + ) + if canonicalize_name(filename_prefix) != self._canonical_name: + raise LinkMismatchError( + f"The package name doesn't match {egg_info}, set env var " + "UNEARTH_LOOSE_FILENAME=1 to allow legacy filename." + ) try: Version(version) except InvalidVersion: raise LinkMismatchError( f"Invalid version in the filename {egg_info}: {version}" ) - self._check_hashes(link) except LinkMismatchError as e: logger.debug("Skipping link %s: %s", link, e) return None @@ -292,3 +282,35 @@ ) return False return True + + +def _get_hash(link: Link, hash_name: str, session: Session) -> str: + hasher = hashlib.new(hash_name) + with session.get(link.normalized, stream=True) as resp: + for chunk in resp.iter_content(chunk_size=1024 * 8): + hasher.update(chunk) + digest = hasher.hexdigest() + if not link.hashes: + link.hashes = {} + link.hashes[hash_name] = digest + return digest + + +def validate_hashes( + package: Package, hashes: dict[str, list[str]], session: Session +) -> bool: + if not hashes: + return True + link = package.link + link_hashes = link.hash_option + if link_hashes: + for hash_name, allowed_hashes in hashes.items(): + if hash_name in link_hashes: + given_hash = link_hashes[hash_name][0] + if given_hash not in allowed_hashes: + return False + return True + + hash_name, allowed_hashes = next(iter(hashes.items())) + given_hash = _get_hash(link, hash_name, session) + return given_hash in allowed_hashes diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/finder.py new/unearth-0.12.0/src/unearth/finder.py --- old/unearth-0.9.1/src/unearth/finder.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/finder.py 2023-10-20 09:20:50.556266500 +0200 @@ -6,6 +6,7 @@ import itertools import os import pathlib +import warnings from tempfile import TemporaryDirectory from typing import TYPE_CHECKING, Iterable, NamedTuple, Sequence from urllib.parse import urljoin @@ -22,15 +23,18 @@ TargetPython, evaluate_package, is_equality_specifier, + validate_hashes, ) from unearth.link import Link -from unearth.preparer import unpack_link +from unearth.preparer import noop_download_reporter, noop_unpack_reporter, unpack_link from unearth.session import PyPISession from unearth.utils import LazySequence if TYPE_CHECKING: from typing import TypedDict + from unearth.preparer import DownloadReporter, UnpackReporter + class Source(TypedDict): url: str type: str @@ -93,9 +97,9 @@ self.add_find_links(url) self.target_python = target_python or TargetPython() self.ignore_compatibility = ignore_compatibility - self.no_binary = [canonicalize_name(name) for name in no_binary] - self.only_binary = [canonicalize_name(name) for name in only_binary] - self.prefer_binary = [canonicalize_name(name) for name in prefer_binary] + self.no_binary = {canonicalize_name(name) for name in no_binary} + self.only_binary = {canonicalize_name(name) for name in only_binary} + self.prefer_binary = {canonicalize_name(name) for name in prefer_binary} self.trusted_hosts = trusted_hosts self._session = session self.respect_source_order = respect_source_order @@ -145,26 +149,25 @@ Args: package_name (str): The desired package name allow_yanked (bool): Whether to allow yanked candidates. - hashes (dict[str, list[str]]|None): The hashes to filter on. Returns: Evaluator: The evaluator for the given package name """ - if hashes: - hashes = {name: sorted(values) for name, values in hashes.items()} - canonical_name = canonicalize_name(package_name) + if hashes is not None: + warnings.warn( + "The evaluator no longer validates hashes, " + "please remove the hashes argument", + FutureWarning, + stacklevel=2, + ) format_control = FormatControl( - no_binary=canonical_name in self.no_binary or ":all:" in self.no_binary, - only_binary=canonical_name in self.only_binary - or ":all:" in self.only_binary, + no_binary=self.no_binary, only_binary=self.only_binary ) return Evaluator( package_name=package_name, - session=self.session, target_python=self.target_python, ignore_compatibility=self.ignore_compatibility, allow_yanked=allow_yanked, - hashes=hashes or {}, format_control=format_control, ) @@ -198,6 +201,14 @@ ) return filter(evaluator, packages) + def _evaluate_hashes( + self, packages: Iterable[Package], hashes: dict[str, list[str]] + ) -> Iterable[Package]: + evaluator = functools.partial( + validate_hashes, hashes=hashes, session=self.session + ) + return filter(evaluator, packages) + def _sort_key(self, package: Package) -> tuple: """The key for sort, package with the largest value is the most preferred.""" link = package.link @@ -210,7 +221,10 @@ (self._tag_priorities.get(tag, pri - 1) for tag in file_tags), default=pri - 1, ) - if canonicalize_name(package.name) in self.prefer_binary: + if ( + canonicalize_name(package.name) in self.prefer_binary + or ":all:" in self.prefer_binary + ): prefer_binary = True return ( @@ -225,19 +239,17 @@ self, package_name: str, allow_yanked: bool = False, - hashes: dict[str, list[str]] | None = None, ) -> Iterable[Package]: """Find all packages with the given name. Args: package_name (str): The desired package name allow_yanked (bool): Whether to allow yanked candidates. - hashes (dict[str, list[str]]|None): The hashes to filter on. Returns: Iterable[Package]: The packages with the given name, sorted by best match. """ - evaluator = self.build_evaluator(package_name, allow_yanked, hashes) + evaluator = self.build_evaluator(package_name, allow_yanked) def find_one_source(source: Source) -> Iterable[Package]: if source["type"] == "index": @@ -278,20 +290,23 @@ Returns: Sequence[Package]: The packages list sorted by best match """ - return LazySequence(self._find_packages(package_name, allow_yanked, hashes)) + return LazySequence( + self._evaluate_hashes( + self._find_packages(package_name, allow_yanked), hashes=hashes or {} + ) + ) def _find_packages_from_requirement( self, requirement: packaging.requirements.Requirement, allow_yanked: bool | None = None, - hashes: dict[str, list[str]] | None = None, ) -> Iterable[Package]: if allow_yanked is None: allow_yanked = is_equality_specifier(requirement.specifier) if requirement.url: yield Package(requirement.name, None, link=Link(requirement.url)) else: - yield from self._find_packages(requirement.name, allow_yanked, hashes) + yield from self._find_packages(requirement.name, allow_yanked) def find_matches( self, @@ -317,10 +332,13 @@ if isinstance(requirement, str): requirement = packaging.requirements.Requirement(requirement) return LazySequence( - self._evaluate_packages( - self._find_packages_from_requirement(requirement, allow_yanked, hashes), - requirement, - allow_prereleases, + self._evaluate_hashes( + self._evaluate_packages( + self._find_packages_from_requirement(requirement, allow_yanked), + requirement, + allow_prereleases, + ), + hashes=hashes or {}, ) ) @@ -347,12 +365,13 @@ """ if isinstance(requirement, str): requirement = packaging.requirements.Requirement(requirement) - packages = self._find_packages_from_requirement( - requirement, allow_yanked, hashes - ) + packages = self._find_packages_from_requirement(requirement, allow_yanked) candidates = LazySequence(packages) applicable_candidates = LazySequence( - self._evaluate_packages(packages, requirement, allow_prereleases) + self._evaluate_hashes( + self._evaluate_packages(packages, requirement, allow_prereleases), + hashes=hashes or {}, + ) ) best_match = next(iter(applicable_candidates), None) return BestMatch(best_match, applicable_candidates, candidates) @@ -363,6 +382,8 @@ location: str | pathlib.Path, download_dir: str | pathlib.Path | None = None, hashes: dict[str, list[str]] | None = None, + download_reporter: DownloadReporter = noop_download_reporter, + unpack_reporter: UnpackReporter = noop_unpack_reporter, ) -> pathlib.Path: """Download and unpack the package at the given link. @@ -379,21 +400,32 @@ download_dir: The directory to download to, or None to use a temporary directory created by unearth. hashes (dict[str, list[str]]|None): The optional hash dict for validation. + download_reporter (DownloadReporter): The download reporter for progress + reporting. By default, it does nothing. + unpack_reporter (UnpackReporter): The unpack reporter for progress + reporting. By default, it does nothing. Returns: The path to the installable file or directory. """ - # Strip the rev part for VCS links + import contextlib + if hashes is None: hashes = link.hash_option - if download_dir is None: - download_dir = TemporaryDirectory(prefix="unearth-download-").name - file = unpack_link( - self.session, - link, - pathlib.Path(download_dir), - pathlib.Path(location), - hashes, - verbosity=self.verbosity, - ) + + with contextlib.ExitStack() as stack: + if download_dir is None: + download_dir = stack.enter_context( + TemporaryDirectory(prefix="unearth-download-") + ) + file = unpack_link( + self.session, + link, + pathlib.Path(download_dir), + pathlib.Path(location), + hashes, + verbosity=self.verbosity, + download_reporter=download_reporter, + unpack_reporter=unpack_reporter, + ) return file.joinpath(link.subdirectory) if link.subdirectory else file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/link.py new/unearth-0.12.0/src/unearth/link.py --- old/unearth-0.9.1/src/unearth/link.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/link.py 2023-10-20 09:20:50.556266500 +0200 @@ -111,7 +111,7 @@ def dist_info_link(self) -> Link | None: return ( type(self)(f"{self.url_without_fragment}.metadata", self.comes_from) - if self.dist_info_metadata is not None + if self.dist_info_metadata else None ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/preparer.py new/unearth-0.12.0/src/unearth/preparer.py --- old/unearth-0.9.1/src/unearth/preparer.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/preparer.py 2023-10-20 09:20:50.560266700 +0200 @@ -1,6 +1,7 @@ """Unpack the link to an installed wheel or source.""" from __future__ import annotations +import functools import hashlib import logging import mimetypes @@ -10,7 +11,7 @@ import tarfile import zipfile from pathlib import Path -from typing import Iterable, cast +from typing import TYPE_CHECKING, Iterable, cast from requests import HTTPError, Session @@ -23,9 +24,30 @@ ZIP_EXTENSIONS, display_path, format_size, + iter_with_callback, ) from unearth.vcs import vcs_support +if TYPE_CHECKING: + from typing import Protocol + + class DownloadReporter(Protocol): + def __call__(self, link: Link, completed: int, total: int | None) -> None: + ... + + class UnpackReporter(Protocol): + def __call__(self, filename: Path, completed: int, total: int | None) -> None: + ... + + +def noop_download_reporter(link: Link, completed: int, total: int | None) -> None: + pass + + +def noop_unpack_reporter(filename: Path, completed: int, total: int | None) -> None: + pass + + READ_CHUNK_SIZE = 8192 logger = logging.getLogger(__name__) @@ -133,30 +155,33 @@ return True -def unpack_archive(archive: Path, dest: Path) -> None: +def unpack_archive( + archive: Path, dest: Path, reporter: UnpackReporter = noop_unpack_reporter +) -> None: content_type = mimetypes.guess_type(str(archive))[0] if ( content_type == "application/zip" or zipfile.is_zipfile(archive) or archive.suffix.lower() in ZIP_EXTENSIONS ): - _unzip_archive(archive, dest) + _unzip_archive(archive, dest, reporter=reporter) elif ( content_type == "application/x-gzip" or tarfile.is_tarfile(archive) or archive.suffix.lower() in (TAR_EXTENSIONS + XZ_EXTENSIONS + BZ2_EXTENSIONS) ): - _untar_archive(archive, dest) + _untar_archive(archive, dest, reporter=reporter) else: raise UnpackError(f"Unknown archive type: {archive.name}") -def _unzip_archive(filename: Path, location: Path) -> None: +def _unzip_archive(filename: Path, location: Path, reporter: UnpackReporter) -> None: os.makedirs(location, exist_ok=True) zipfp = open(filename, "rb") with zipfile.ZipFile(zipfp, allowZip64=True) as zip: leading = has_leading_dir(zip.namelist()) - for info in zip.infolist(): + callback = functools.partial(reporter, filename, total=len(zip.infolist())) + for info in iter_with_callback(zip.infolist(), callback): name = info.filename fn = name if leading: @@ -183,7 +208,7 @@ set_extracted_file_to_default_mode_plus_executable(fn) -def _untar_archive(filename: Path, location: Path) -> None: +def _untar_archive(filename: Path, location: Path, reporter: UnpackReporter) -> None: """Untar the file (with path `filename`) to the destination `location`.""" os.makedirs(location, exist_ok=True) lower_fn = str(filename).lower() @@ -203,7 +228,8 @@ mode = "r:*" with tarfile.open(filename, mode, encoding="utf-8") as tar: leading = has_leading_dir([member.name for member in tar.getmembers()]) - for member in tar.getmembers(): + callback = functools.partial(reporter, filename, total=len(tar.getmembers())) + for member in iter_with_callback(tar.getmembers(), callback): fn = member.name if leading: fn = split_leading_dir(fn)[1] @@ -261,6 +287,8 @@ location: Path, hashes: dict[str, list[str]] | None = None, verbosity: int = 0, + download_reporter: DownloadReporter = noop_download_reporter, + unpack_reporter: UnpackReporter = noop_unpack_reporter, ) -> Path: """Unpack link into location. @@ -297,21 +325,30 @@ # A remote artfiact link, check the download dir first artifact = download_dir / link.filename if not _check_downloaded(artifact, hashes): - resp = session.get(link.normalized, stream=True) - try: - resp.raise_for_status() - except HTTPError as e: - raise UnpackError(f"Download failed: {e}") from None - if getattr(resp, "from_cache", False): - logger.info("Using cached %s", link) - else: - size = format_size(resp.headers.get("Content-Length", "")) - logger.info("Downloading %s (%s)", link, size) - with artifact.open("wb") as f: - for chunk in resp.iter_content(chunk_size=READ_CHUNK_SIZE): - if chunk: - validator.update(chunk) - f.write(chunk) + with session.get(link.normalized, stream=True) as resp: + try: + resp.raise_for_status() + except HTTPError as e: + raise UnpackError(f"Download failed: {e}") from None + try: + total = int(resp.headers["Content-Length"]) + except (KeyError, ValueError, TypeError): + total = None + if getattr(resp, "from_cache", False): + logger.info("Using cached %s", link) + else: + size = format_size(resp.headers.get("Content-Length", "")) + logger.info("Downloading %s (%s)", link, size) + with artifact.open("wb") as f: + callback = functools.partial(download_reporter, link, total=total) + for chunk in iter_with_callback( + resp.iter_content(chunk_size=READ_CHUNK_SIZE), + callback, + stepper=len, + ): + if chunk: + validator.update(chunk) + f.write(chunk) validator.validate() if link.is_wheel: if link.is_file: @@ -323,5 +360,5 @@ os.replace(artifact, target_file) return target_file - unpack_archive(artifact, location) + unpack_archive(artifact, location, reporter=unpack_reporter) return location diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/utils.py new/unearth-0.12.0/src/unearth/utils.py --- old/unearth-0.9.1/src/unearth/utils.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/utils.py 2023-10-20 09:20:50.560266700 +0200 @@ -4,10 +4,12 @@ import functools import itertools import os +import re import sys import urllib.parse as parse +import warnings from pathlib import Path -from typing import Iterable, Iterator, Sequence, TypeVar +from typing import Callable, Iterable, Iterator, Sequence, TypeVar from urllib.request import pathname2url, url2pathname WINDOWS = sys.platform == "win32" @@ -186,7 +188,7 @@ return f"{int(int_size)} bytes" -T = TypeVar("T", covariant=True) +T = TypeVar("T") class LazySequence(Sequence[T]): @@ -217,3 +219,53 @@ if i == index: return item raise IndexError("Index out of range") + + +_legacy_specifier_re = re.compile(r"(==|!=|<=|>=|<|>)(\s*)([^,;\s)]*)") + + +@functools.lru_cache() +def fix_legacy_specifier(specifier: str) -> str: + """Since packaging 22.0, legacy specifiers like '>=4.*' are no longer + supported. We try to normalize them to the new format. + """ + + def fix_wildcard(match: re.Match[str]) -> str: + operator, _, version = match.groups() + if operator in ("==", "!="): + return match.group(0) + if ".*" in version: + warnings.warn( + ".* suffix can only be used with `==` or `!=` operators", + FutureWarning, + stacklevel=4, + ) + version = version.replace(".*", ".0") + if operator in ("<", "<="): # <4.* and <=4.* are equivalent to <4.0 + operator = "<" + elif operator in (">", ">="): # >4.* and >=4.* are equivalent to >=4.0 + operator = ">=" + elif "+" in version: # Drop the local version + warnings.warn( + "Local version label can only be used with `==` or `!=` operators", + FutureWarning, + stacklevel=4, + ) + version = version.split("+")[0] + return f"{operator}{version}" + + return _legacy_specifier_re.sub(fix_wildcard, specifier) + + +def iter_with_callback( + iterable: Iterable[T], + callback: Callable[[int], None], + stepper: Callable[[T], int] = lambda _: 1, +) -> Iterator[T]: + completed = 0 + for item in iterable: + try: + yield item + finally: + completed += stepper(item) + callback(completed) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/src/unearth/vcs/base.py new/unearth-0.12.0/src/unearth/vcs/base.py --- old/unearth-0.9.1/src/unearth/vcs/base.py 2023-05-15 09:34:42.539511400 +0200 +++ new/unearth-0.12.0/src/unearth/vcs/base.py 2023-10-20 09:20:50.560266700 +0200 @@ -76,6 +76,13 @@ logger.debug(e.stdout.rstrip()) return subprocess.CompletedProcess(e.args, e.returncode, e.stdout) raise UnpackError(e.output) from None + except FileNotFoundError: + logger.debug(f"Cannot find `{self.name}`, PATH={os.environ.get('PATH')}") + msg = ( + f"Unable to find executable `{self.name}`, " + "make sure it's installed in PATH." + ) + raise FileNotFoundError(msg) from None else: if log_output: logger.debug(result.stdout.rstrip()) Binary files old/unearth-0.9.1/tests/fixtures/files/first-2.0.2.tar.gz and new/unearth-0.12.0/tests/fixtures/files/first-2.0.2.tar.gz differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/tests/test_evaluator.py new/unearth-0.12.0/tests/test_evaluator.py --- old/unearth-0.9.1/tests/test_evaluator.py 2023-05-15 09:34:42.543511400 +0200 +++ new/unearth-0.12.0/tests/test_evaluator.py 2023-10-20 09:20:50.560266700 +0200 @@ -7,6 +7,7 @@ Package, TargetPython, evaluate_package, + validate_hashes, ) from unearth.link import Link @@ -34,28 +35,23 @@ sess.close() -def test_no_binary_and_only_binary_conflict(): - with pytest.raises(ValueError): - FormatControl(no_binary=True, only_binary=True) - - @pytest.mark.parametrize("link", BINARY_LINKS) def test_only_binary_is_allowed(link): - format_control = FormatControl(only_binary=True, no_binary=False) + format_control = FormatControl(only_binary={"foo"}) format_control.check_format(link, "foo") - format_control = FormatControl(only_binary=False, no_binary=True) + format_control = FormatControl(no_binary={"foo"}) with pytest.raises(ValueError): format_control.check_format(link, "foo") @pytest.mark.parametrize("link", SOURCE_LINKS) def test_no_binary_is_allowed(link): - format_control = FormatControl(only_binary=True, no_binary=False) + format_control = FormatControl(only_binary={"foo"}) with pytest.raises(ValueError): format_control.check_format(link, "foo") - format_control = FormatControl(only_binary=False, no_binary=True) + format_control = FormatControl(no_binary={"foo"}) format_control.check_format(link, "foo") @@ -66,11 +62,11 @@ @pytest.mark.parametrize("allow_yanked", (True, False)) -def test_evaluate_yanked_link(allow_yanked, session): +def test_evaluate_yanked_link(allow_yanked): link = Link( "https://test.pypi.org/files/click-8.1.3-py3-none-any.whl", yank_reason="bad" ) - evaluator = Evaluator("click", session, allow_yanked=allow_yanked) + evaluator = Evaluator("click", allow_yanked=allow_yanked) if allow_yanked: assert evaluator.evaluate_link(link) is not None else: @@ -87,7 +83,7 @@ ) @pytest.mark.parametrize("ignore_compatibility", (True, False)) def test_evaluate_link_python_version( - python_version, requires_python, expected, ignore_compatibility, session + python_version, requires_python, expected, ignore_compatibility ): link = Link( "https://test.pypi.org/files/click-8.1.3-py3-none-any.whl", @@ -95,7 +91,6 @@ ) evaluator = Evaluator( "click", - session, target_python=TargetPython(python_version), ignore_compatibility=ignore_compatibility, ) @@ -111,12 +106,25 @@ "https://test.pypi.org/files/click-8.1.3_develop-py3-none-any.whl", ], ) -def test_evaluate_invalid_wheel_name(url, session): +def test_evaluate_invalid_wheel_name(url): link = Link(url) - evaluator = Evaluator("click", session) + evaluator = Evaluator("click") assert evaluator.evaluate_link(link) is None +@pytest.mark.parametrize("loose", (True, False)) +def test_evaluate_link_loose_filename(loose: bool, monkeypatch: pytest.MonkeyPatch): + if loose: + monkeypatch.setenv("UNEARTH_LOOSE_FILENAME", "1") + link = Link("https://test.pypi.org/files/foo-2-2.tar.gz") + evaluator = Evaluator("foo") + package = evaluator.evaluate_link(link) + if loose: + assert package.version == "2-2" + else: + assert package is None + + @pytest.mark.parametrize( "link,expected", [ @@ -125,8 +133,8 @@ ("https://test.pypi.org/files/Jinja2-3.1.2.zip", False), ], ) -def test_evaluate_against_name_match(link, expected, session): - evaluator = Evaluator("click", session) +def test_evaluate_against_name_match(link, expected): + evaluator = Evaluator("click") assert (evaluator.evaluate_link(Link(link)) is None) is not expected @@ -139,8 +147,8 @@ Link("git+...@github.com:pallets/click.git@main#egg=click"), ], ) -def test_evaluate_against_missing_version(link, session): - evaluator = Evaluator("click", session) +def test_evaluate_against_missing_version(link): + evaluator = Evaluator("click") assert evaluator.evaluate_link(link) is None @@ -164,12 +172,12 @@ ), ], ) -def test_evaluate_against_allowed_hashes(session, url, match): - evaluator = Evaluator( - "click", session, hashes={"sha256": ["1234567890abcdef", "fedcba0987654321"]} +def test_evaluate_against_allowed_hashes(url, match, session): + package = Package("click", "8.1.3", link=Link(url)) + result = validate_hashes( + package, {"sha256": ["1234567890abcdef", "fedcba0987654321"]}, session=session ) - - assert (evaluator.evaluate_link(Link(url)) is not None) is match + assert result is match @pytest.mark.parametrize( @@ -180,9 +188,9 @@ "https://test.pypi.org/files/click-8.1.3-py3-none-any.whl#md5=1111222", ], ) -def test_evaluate_allow_all_hashes(session, url): - evaluator = Evaluator("click", session) - assert evaluator.evaluate_link(Link(url)) is not None +def test_evaluate_allow_all_hashes(url, session): + package = Package("click", "8.1.3", link=Link(url)) + assert validate_hashes(package, {}, session) @pytest.mark.parametrize( @@ -193,23 +201,23 @@ ], ) def test_retrieve_hash_from_internet(pypi, session, url): - evaluator = Evaluator( - "click", - session, + link = Link(url) + package = Package("click", "8.1.3", link=link) + assert validate_hashes( + package, hashes={ "sha256": [ "bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" ] }, + session=session, ) - link = Link(url) - assert evaluator.evaluate_link(link) is not None - assert link.hash_name == "sha256" - assert ( - link.hash == "bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" - ) + hash_name, hash = next(iter(link.hashes.items())) + assert hash_name == "sha256" + assert hash == "bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" +@pytest.mark.filterwarnings("ignore::FutureWarning") @pytest.mark.parametrize( "link,expected", [ @@ -226,13 +234,19 @@ ), False, ), + ( + Link( + "https://test.pypi.org/files/click-8.1.3-py3-none-any.whl", + requires_python=">3.6.*", + ), + True, + ), ], ) @pytest.mark.parametrize("ignore_compatibility", (True, False)) -def test_evaluate_compatibility_tags(link, expected, ignore_compatibility, session): +def test_evaluate_compatibility_tags(link, expected, ignore_compatibility): evaluator = Evaluator( "click", - session, target_python=TargetPython((3, 9), ["cp39"], "cp", ["win_amd64"]), ignore_compatibility=ignore_compatibility, ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/unearth-0.9.1/tests/test_finder.py new/unearth-0.12.0/tests/test_finder.py --- old/unearth-0.9.1/tests/test_finder.py 2023-05-15 09:34:42.543511400 +0200 +++ new/unearth-0.12.0/tests/test_finder.py 2023-10-20 09:20:50.560266700 +0200 @@ -188,3 +188,42 @@ best = finder.find_best_match("first").best assert best.link.filename == "first-2.0.2.tar.gz" assert best.link.comes_from == "https://pypi.org/simple/first/" + + +def test_download_package_file(session, tmp_path): + finder = PackageFinder( + session=session, + index_urls=[DEFAULT_INDEX_URL], + ignore_compatibility=True, + ) + found = finder.find_best_match("first").best.link + assert found.filename == "first-2.0.2.tar.gz" + for subdir in ("download", "unpack"): + (tmp_path / subdir).mkdir() + + download_reports = [] + unpack_reports = [] + + def download_reporter(link, completed, total): + download_reports.append((link, completed, total)) + + def unpack_reporter(filename, completed, total): + unpack_reports.append((filename, completed, total)) + + finder.download_and_unpack( + found, + tmp_path / "unpack", + download_dir=tmp_path / "download", + download_reporter=download_reporter, + unpack_reporter=unpack_reporter, + ) + downloaded = tmp_path / "download" / found.filename + assert downloaded.exists() + size = downloaded.stat().st_size + assert size > 0 + _, completed, total = download_reports[-1] + assert completed == total == size + + filename, completed, total = unpack_reports[-1] + assert completed == total + assert filename == downloaded