commit: 82e0b23ec7814a757e9325db279e8b0f131a125a Author: Sam James <sam <AT> gentoo <DOT> org> AuthorDate: Thu Sep 11 01:44:46 2025 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Thu Sep 11 03:16:43 2025 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=82e0b23e
Revert "bintree: Accelerate index fetch by requesting Packages.gz first" This reverts commit fd55382a4935097c547800c7cf93a20151575fbd. After reveritng 8b96ca57e15bbc989f39ec4973fc3c9492fcc059 to fix a CI hang, we see a test failure. Reverting pending investigation. It can of course be reapplied once we've figured out what's going on and fixed as necessary. Bug: https://bugs.gentoo.org/958635 Bug: https://bugs.gentoo.org/962721 Signed-off-by: Sam James <sam <AT> gentoo.org> Part-of: https://github.com/gentoo/portage/pull/1457 Closes: https://github.com/gentoo/portage/pull/1457 Signed-off-by: Sam James <sam <AT> gentoo.org> lib/portage/dbapi/bintree.py | 380 +++++++++++++++++++++---------------------- 1 file changed, 182 insertions(+), 198 deletions(-) diff --git a/lib/portage/dbapi/bintree.py b/lib/portage/dbapi/bintree.py index ae7d08ffc4..f6bc0d7490 100644 --- a/lib/portage/dbapi/bintree.py +++ b/lib/portage/dbapi/bintree.py @@ -1412,227 +1412,211 @@ class binarytree: rmt_idx = self._new_pkgindex() proc = None tmp_filename = None - for remote_pkgindex_file in ("Packages.gz", "Packages"): - try: - # urlparse.urljoin() only works correctly with recognized - # protocols and requires the base url to have a trailing - # slash, so join manually... - url = base_url.rstrip("/") + "/" + remote_pkgindex_file - f = None + try: + # urlparse.urljoin() only works correctly with recognized + # protocols and requires the base url to have a trailing + # slash, so join manually... + url = base_url.rstrip("/") + "/Packages" + f = None + + if local_timestamp and (repo.frozen or not getbinpkg_refresh): + raise UseCachedCopyOfRemoteIndex() - if local_timestamp and (repo.frozen or not getbinpkg_refresh): + try: + ttl = float(pkgindex.header.get("TTL", 0)) + except ValueError: + pass + else: + if ( + download_timestamp + and ttl + and download_timestamp + ttl > time.time() + ): raise UseCachedCopyOfRemoteIndex() + # Set proxy settings for _urlopen -> urllib_request + proxies = {} + for proto in ("http", "https"): + value = self.settings.get(proto + "_proxy") + if value is not None: + proxies[proto] = value + + # Don't use urlopen for https, unless + # PEP 476 is supported (bug #469888). + if ( + repo.fetchcommand is None or parsed_url.scheme in ("", "file") + ) and (parsed_url.scheme not in ("https",) or _have_pep_476()): try: - ttl = float(pkgindex.header.get("TTL", 0)) - except ValueError: - pass - else: + if parsed_url.scheme in ("", "file"): + f = open(f"{parsed_url.path.rstrip('/')}/Packages", "rb") + else: + f = _urlopen( + url, if_modified_since=local_timestamp, proxies=proxies + ) + if hasattr(f, "headers") and f.headers.get("timestamp", ""): + remote_timestamp = f.headers.get("timestamp") + except OSError as err: if ( - download_timestamp - and ttl - and download_timestamp + ttl > time.time() - ): + hasattr(err, "code") and err.code == 304 + ): # not modified (since local_timestamp) raise UseCachedCopyOfRemoteIndex() - # Set proxy settings for _urlopen -> urllib_request - proxies = {} - for proto in ("http", "https"): - value = self.settings.get(proto + "_proxy") - if value is not None: - proxies[proto] = value + if parsed_url.scheme in ("ftp", "http", "https"): + # This protocol is supposedly supported by urlopen, + # so apparently there's a problem with the url + # or a bug in urlopen. + if self.settings.get("PORTAGE_DEBUG", "0") != "0": + traceback.print_exc() - # Don't use urlopen for https, unless - # PEP 476 is supported (bug #469888). - if ( - repo.fetchcommand is None or parsed_url.scheme in ("", "file") - ) and (parsed_url.scheme not in ("https",) or _have_pep_476()): - try: - if parsed_url.scheme in ("", "file"): - f = open( - f"{parsed_url.path.rstrip('/')}/Packages", "rb" - ) - else: - f = _urlopen( - url, - if_modified_since=local_timestamp, - proxies=proxies, - ) - if hasattr(f, "headers") and f.headers.get( - "timestamp", "" - ): - remote_timestamp = f.headers.get("timestamp") - except OSError as err: - if ( - hasattr(err, "code") and err.code == 304 - ): # not modified (since local_timestamp) - raise UseCachedCopyOfRemoteIndex() - - if parsed_url.scheme in ("ftp", "http", "https"): - # This protocol is supposedly supported by urlopen, - # so apparently there's a problem with the url - # or a bug in urlopen. - if self.settings.get("PORTAGE_DEBUG", "0") != "0": - traceback.print_exc() - - raise - except ValueError: - raise ParseError( - f"Invalid Portage BINHOST value '{url.lstrip()}'" - ) + raise + except ValueError: + raise ParseError( + f"Invalid Portage BINHOST value '{url.lstrip()}'" + ) - if f is None: - path = parsed_url.path.rstrip("/") + "/Packages" - - if repo.fetchcommand is None and parsed_url.scheme == "ssh": - # Use a pipe so that we can terminate the download - # early if we detect that the TIMESTAMP header - # matches that of the cached Packages file. - ssh_args = ["ssh"] - if port is not None: - ssh_args.append(f"-p{port}") - # NOTE: shlex evaluates embedded quotes - ssh_args.extend( - shlex.split(self.settings.get("PORTAGE_SSH_OPTS", "")) - ) - ssh_args.append(user_passwd + host) - ssh_args.append("--") - ssh_args.append("cat") - ssh_args.append(path) + if f is None: + path = parsed_url.path.rstrip("/") + "/Packages" + + if repo.fetchcommand is None and parsed_url.scheme == "ssh": + # Use a pipe so that we can terminate the download + # early if we detect that the TIMESTAMP header + # matches that of the cached Packages file. + ssh_args = ["ssh"] + if port is not None: + ssh_args.append(f"-p{port}") + # NOTE: shlex evaluates embedded quotes + ssh_args.extend( + shlex.split(self.settings.get("PORTAGE_SSH_OPTS", "")) + ) + ssh_args.append(user_passwd + host) + ssh_args.append("--") + ssh_args.append("cat") + ssh_args.append(path) - proc = subprocess.Popen(ssh_args, stdout=subprocess.PIPE) - f = proc.stdout - else: - if repo.fetchcommand is None: - setting = "FETCHCOMMAND_" + parsed_url.scheme.upper() - fcmd = self.settings.get(setting) + proc = subprocess.Popen(ssh_args, stdout=subprocess.PIPE) + f = proc.stdout + else: + if repo.fetchcommand is None: + setting = "FETCHCOMMAND_" + parsed_url.scheme.upper() + fcmd = self.settings.get(setting) + if not fcmd: + fcmd = self.settings.get("FETCHCOMMAND") if not fcmd: - fcmd = self.settings.get("FETCHCOMMAND") - if not fcmd: - raise OSError("FETCHCOMMAND is unset") - else: - fcmd = repo.fetchcommand + raise OSError("FETCHCOMMAND is unset") + else: + fcmd = repo.fetchcommand - fd, tmp_filename = tempfile.mkstemp() - tmp_dirname, tmp_basename = os.path.split(tmp_filename) - os.close(fd) + fd, tmp_filename = tempfile.mkstemp() + tmp_dirname, tmp_basename = os.path.split(tmp_filename) + os.close(fd) - fcmd_vars = { - "DISTDIR": tmp_dirname, - "FILE": tmp_basename, - "URI": url, - } + fcmd_vars = { + "DISTDIR": tmp_dirname, + "FILE": tmp_basename, + "URI": url, + } - for k in ("PORTAGE_SSH_OPTS",): - v = self.settings.get(k) - if v is not None: - fcmd_vars[k] = v + for k in ("PORTAGE_SSH_OPTS",): + v = self.settings.get(k) + if v is not None: + fcmd_vars[k] = v - success = portage.getbinpkg.file_get( - fcmd=fcmd, fcmd_vars=fcmd_vars - ) - if not success: - raise OSError(f"{setting} failed") - f = open(tmp_filename, "rb") - - if remote_pkgindex_file == "Packages.gz": - f = GzipFile(fileobj=f, mode="rb") + success = portage.getbinpkg.file_get( + fcmd=fcmd, fcmd_vars=fcmd_vars + ) + if not success: + raise OSError(f"{setting} failed") + f = open(tmp_filename, "rb") - f_dec = codecs.iterdecode( - f, _encodings["repo.content"], errors="replace" - ) - try: - rmt_idx.readHeader(f_dec) - if ( - not remote_timestamp - ): # in case it had not been read from HTTP header - remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) - if not remote_timestamp: - # no timestamp in the header, something's wrong - pkgindex = None + f_dec = codecs.iterdecode( + f, _encodings["repo.content"], errors="replace" + ) + try: + rmt_idx.readHeader(f_dec) + if ( + not remote_timestamp + ): # in case it had not been read from HTTP header + remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) + if not remote_timestamp: + # no timestamp in the header, something's wrong + pkgindex = None + writemsg( + _( + "\n\n!!! Binhost package index " + " has no TIMESTAMP field.\n" + ), + noiselevel=-1, + ) + else: + if not self._pkgindex_version_supported(rmt_idx): writemsg( _( - "\n\n!!! Binhost package index " - " has no TIMESTAMP field.\n" - ), - noiselevel=-1, - ) - else: - if not self._pkgindex_version_supported(rmt_idx): - writemsg( - _( - "\n\n!!! Binhost package index version" - " is not supported: '%s'\n" - ) - % rmt_idx.header.get("VERSION"), - noiselevel=-1, + "\n\n!!! Binhost package index version" + " is not supported: '%s'\n" ) - pkgindex = None - elif not local_timestamp or int(local_timestamp) < int( - remote_timestamp - ): - rmt_idx.readBody(f_dec) - pkgindex = rmt_idx - finally: - # Timeout after 5 seconds, in case close() blocks - # indefinitely (see bug #350139). - try: - try: - AlarmSignal.register(5) - f.close() - finally: - AlarmSignal.unregister() - except AlarmSignal: - writemsg( - "\n\n!!! %s\n" - % _("Timed out while closing connection to binhost"), + % rmt_idx.header.get("VERSION"), noiselevel=-1, ) - break - except UseCachedCopyOfRemoteIndex: - changed = False - desc = "frozen" if repo.frozen else "up-to-date" - writemsg_stdout("\n") - writemsg_stdout( - colorize( - "GOOD", - _("Local copy of remote index is %s and will be used.") - % desc, - ) - + "\n" - ) - rmt_idx = pkgindex - except OSError as e: - # This includes URLError which is raised for SSL - # certificate errors when PEP 476 is supported. - writemsg( - _("\n\n!!! Error fetching binhost package" " info from '%s'\n") - % _hide_url_passwd(base_url) - ) - # With Python 2, the EnvironmentError message may - # contain bytes or unicode, so use str to ensure - # safety with all locales (bug #532784). - try: - error_msg = str(e) - except UnicodeDecodeError as uerror: - error_msg = str( - uerror.object, encoding="utf_8", errors="replace" - ) - writemsg(f"!!! {error_msg}\n\n") - del e - pkgindex = None + pkgindex = None + elif not local_timestamp or int(local_timestamp) < int( + remote_timestamp + ): + rmt_idx.readBody(f_dec) + pkgindex = rmt_idx finally: - if proc is not None: - if proc.poll() is None: - proc.kill() - proc.wait() - proc = None - if tmp_filename is not None: + # Timeout after 5 seconds, in case close() blocks + # indefinitely (see bug #350139). + try: try: - os.unlink(tmp_filename) - except OSError: - pass - + AlarmSignal.register(5) + f.close() + finally: + AlarmSignal.unregister() + except AlarmSignal: + writemsg( + "\n\n!!! %s\n" + % _("Timed out while closing connection to binhost"), + noiselevel=-1, + ) + except UseCachedCopyOfRemoteIndex: + changed = False + desc = "frozen" if repo.frozen else "up-to-date" + writemsg_stdout("\n") + writemsg_stdout( + colorize( + "GOOD", + _("Local copy of remote index is %s and will be used.") % desc, + ) + + "\n" + ) + rmt_idx = pkgindex + except OSError as e: + # This includes URLError which is raised for SSL + # certificate errors when PEP 476 is supported. + writemsg( + _("\n\n!!! Error fetching binhost package" " info from '%s'\n") + % _hide_url_passwd(base_url) + ) + # With Python 2, the EnvironmentError message may + # contain bytes or unicode, so use str to ensure + # safety with all locales (bug #532784). + try: + error_msg = str(e) + except UnicodeDecodeError as uerror: + error_msg = str(uerror.object, encoding="utf_8", errors="replace") + writemsg(f"!!! {error_msg}\n\n") + del e + pkgindex = None + if proc is not None: + if proc.poll() is None: + proc.kill() + proc.wait() + proc = None + if tmp_filename is not None: + try: + os.unlink(tmp_filename) + except OSError: + pass if pkgindex is rmt_idx and changed: pkgindex.modified = False # don't update the header pkgindex.header["DOWNLOAD_TIMESTAMP"] = "%d" % time.time()
