commit:     82e0b23ec7814a757e9325db279e8b0f131a125a
Author:     Sam James <sam <AT> gentoo <DOT> org>
AuthorDate: Thu Sep 11 01:44:46 2025 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Thu Sep 11 03:16:43 2025 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=82e0b23e

Revert "bintree: Accelerate index fetch by requesting Packages.gz first"

This reverts commit fd55382a4935097c547800c7cf93a20151575fbd.

After reveritng 8b96ca57e15bbc989f39ec4973fc3c9492fcc059 to fix a CI hang,
we see a test failure.

Reverting pending investigation. It can of course be reapplied once we've
figured out what's going on and fixed as necessary.

Bug: https://bugs.gentoo.org/958635
Bug: https://bugs.gentoo.org/962721
Signed-off-by: Sam James <sam <AT> gentoo.org>
Part-of: https://github.com/gentoo/portage/pull/1457
Closes: https://github.com/gentoo/portage/pull/1457
Signed-off-by: Sam James <sam <AT> gentoo.org>

 lib/portage/dbapi/bintree.py | 380 +++++++++++++++++++++----------------------
 1 file changed, 182 insertions(+), 198 deletions(-)

diff --git a/lib/portage/dbapi/bintree.py b/lib/portage/dbapi/bintree.py
index ae7d08ffc4..f6bc0d7490 100644
--- a/lib/portage/dbapi/bintree.py
+++ b/lib/portage/dbapi/bintree.py
@@ -1412,227 +1412,211 @@ class binarytree:
             rmt_idx = self._new_pkgindex()
             proc = None
             tmp_filename = None
-            for remote_pkgindex_file in ("Packages.gz", "Packages"):
-                try:
-                    # urlparse.urljoin() only works correctly with recognized
-                    # protocols and requires the base url to have a trailing
-                    # slash, so join manually...
-                    url = base_url.rstrip("/") + "/" + remote_pkgindex_file
-                    f = None
+            try:
+                # urlparse.urljoin() only works correctly with recognized
+                # protocols and requires the base url to have a trailing
+                # slash, so join manually...
+                url = base_url.rstrip("/") + "/Packages"
+                f = None
+
+                if local_timestamp and (repo.frozen or not getbinpkg_refresh):
+                    raise UseCachedCopyOfRemoteIndex()
 
-                    if local_timestamp and (repo.frozen or not 
getbinpkg_refresh):
+                try:
+                    ttl = float(pkgindex.header.get("TTL", 0))
+                except ValueError:
+                    pass
+                else:
+                    if (
+                        download_timestamp
+                        and ttl
+                        and download_timestamp + ttl > time.time()
+                    ):
                         raise UseCachedCopyOfRemoteIndex()
 
+                # Set proxy settings for _urlopen -> urllib_request
+                proxies = {}
+                for proto in ("http", "https"):
+                    value = self.settings.get(proto + "_proxy")
+                    if value is not None:
+                        proxies[proto] = value
+
+                # Don't use urlopen for https, unless
+                # PEP 476 is supported (bug #469888).
+                if (
+                    repo.fetchcommand is None or parsed_url.scheme in ("", 
"file")
+                ) and (parsed_url.scheme not in ("https",) or _have_pep_476()):
                     try:
-                        ttl = float(pkgindex.header.get("TTL", 0))
-                    except ValueError:
-                        pass
-                    else:
+                        if parsed_url.scheme in ("", "file"):
+                            f = 
open(f"{parsed_url.path.rstrip('/')}/Packages", "rb")
+                        else:
+                            f = _urlopen(
+                                url, if_modified_since=local_timestamp, 
proxies=proxies
+                            )
+                            if hasattr(f, "headers") and 
f.headers.get("timestamp", ""):
+                                remote_timestamp = f.headers.get("timestamp")
+                    except OSError as err:
                         if (
-                            download_timestamp
-                            and ttl
-                            and download_timestamp + ttl > time.time()
-                        ):
+                            hasattr(err, "code") and err.code == 304
+                        ):  # not modified (since local_timestamp)
                             raise UseCachedCopyOfRemoteIndex()
 
-                    # Set proxy settings for _urlopen -> urllib_request
-                    proxies = {}
-                    for proto in ("http", "https"):
-                        value = self.settings.get(proto + "_proxy")
-                        if value is not None:
-                            proxies[proto] = value
+                        if parsed_url.scheme in ("ftp", "http", "https"):
+                            # This protocol is supposedly supported by urlopen,
+                            # so apparently there's a problem with the url
+                            # or a bug in urlopen.
+                            if self.settings.get("PORTAGE_DEBUG", "0") != "0":
+                                traceback.print_exc()
 
-                    # Don't use urlopen for https, unless
-                    # PEP 476 is supported (bug #469888).
-                    if (
-                        repo.fetchcommand is None or parsed_url.scheme in ("", 
"file")
-                    ) and (parsed_url.scheme not in ("https",) or 
_have_pep_476()):
-                        try:
-                            if parsed_url.scheme in ("", "file"):
-                                f = open(
-                                    f"{parsed_url.path.rstrip('/')}/Packages", 
"rb"
-                                )
-                            else:
-                                f = _urlopen(
-                                    url,
-                                    if_modified_since=local_timestamp,
-                                    proxies=proxies,
-                                )
-                                if hasattr(f, "headers") and f.headers.get(
-                                    "timestamp", ""
-                                ):
-                                    remote_timestamp = 
f.headers.get("timestamp")
-                        except OSError as err:
-                            if (
-                                hasattr(err, "code") and err.code == 304
-                            ):  # not modified (since local_timestamp)
-                                raise UseCachedCopyOfRemoteIndex()
-
-                            if parsed_url.scheme in ("ftp", "http", "https"):
-                                # This protocol is supposedly supported by 
urlopen,
-                                # so apparently there's a problem with the url
-                                # or a bug in urlopen.
-                                if self.settings.get("PORTAGE_DEBUG", "0") != 
"0":
-                                    traceback.print_exc()
-
-                                raise
-                        except ValueError:
-                            raise ParseError(
-                                f"Invalid Portage BINHOST value 
'{url.lstrip()}'"
-                            )
+                            raise
+                    except ValueError:
+                        raise ParseError(
+                            f"Invalid Portage BINHOST value '{url.lstrip()}'"
+                        )
 
-                    if f is None:
-                        path = parsed_url.path.rstrip("/") + "/Packages"
-
-                        if repo.fetchcommand is None and parsed_url.scheme == 
"ssh":
-                            # Use a pipe so that we can terminate the download
-                            # early if we detect that the TIMESTAMP header
-                            # matches that of the cached Packages file.
-                            ssh_args = ["ssh"]
-                            if port is not None:
-                                ssh_args.append(f"-p{port}")
-                            # NOTE: shlex evaluates embedded quotes
-                            ssh_args.extend(
-                                
shlex.split(self.settings.get("PORTAGE_SSH_OPTS", ""))
-                            )
-                            ssh_args.append(user_passwd + host)
-                            ssh_args.append("--")
-                            ssh_args.append("cat")
-                            ssh_args.append(path)
+                if f is None:
+                    path = parsed_url.path.rstrip("/") + "/Packages"
+
+                    if repo.fetchcommand is None and parsed_url.scheme == 
"ssh":
+                        # Use a pipe so that we can terminate the download
+                        # early if we detect that the TIMESTAMP header
+                        # matches that of the cached Packages file.
+                        ssh_args = ["ssh"]
+                        if port is not None:
+                            ssh_args.append(f"-p{port}")
+                        # NOTE: shlex evaluates embedded quotes
+                        ssh_args.extend(
+                            shlex.split(self.settings.get("PORTAGE_SSH_OPTS", 
""))
+                        )
+                        ssh_args.append(user_passwd + host)
+                        ssh_args.append("--")
+                        ssh_args.append("cat")
+                        ssh_args.append(path)
 
-                            proc = subprocess.Popen(ssh_args, 
stdout=subprocess.PIPE)
-                            f = proc.stdout
-                        else:
-                            if repo.fetchcommand is None:
-                                setting = "FETCHCOMMAND_" + 
parsed_url.scheme.upper()
-                                fcmd = self.settings.get(setting)
+                        proc = subprocess.Popen(ssh_args, 
stdout=subprocess.PIPE)
+                        f = proc.stdout
+                    else:
+                        if repo.fetchcommand is None:
+                            setting = "FETCHCOMMAND_" + 
parsed_url.scheme.upper()
+                            fcmd = self.settings.get(setting)
+                            if not fcmd:
+                                fcmd = self.settings.get("FETCHCOMMAND")
                                 if not fcmd:
-                                    fcmd = self.settings.get("FETCHCOMMAND")
-                                    if not fcmd:
-                                        raise OSError("FETCHCOMMAND is unset")
-                            else:
-                                fcmd = repo.fetchcommand
+                                    raise OSError("FETCHCOMMAND is unset")
+                        else:
+                            fcmd = repo.fetchcommand
 
-                            fd, tmp_filename = tempfile.mkstemp()
-                            tmp_dirname, tmp_basename = 
os.path.split(tmp_filename)
-                            os.close(fd)
+                        fd, tmp_filename = tempfile.mkstemp()
+                        tmp_dirname, tmp_basename = os.path.split(tmp_filename)
+                        os.close(fd)
 
-                            fcmd_vars = {
-                                "DISTDIR": tmp_dirname,
-                                "FILE": tmp_basename,
-                                "URI": url,
-                            }
+                        fcmd_vars = {
+                            "DISTDIR": tmp_dirname,
+                            "FILE": tmp_basename,
+                            "URI": url,
+                        }
 
-                            for k in ("PORTAGE_SSH_OPTS",):
-                                v = self.settings.get(k)
-                                if v is not None:
-                                    fcmd_vars[k] = v
+                        for k in ("PORTAGE_SSH_OPTS",):
+                            v = self.settings.get(k)
+                            if v is not None:
+                                fcmd_vars[k] = v
 
-                            success = portage.getbinpkg.file_get(
-                                fcmd=fcmd, fcmd_vars=fcmd_vars
-                            )
-                            if not success:
-                                raise OSError(f"{setting} failed")
-                            f = open(tmp_filename, "rb")
-
-                    if remote_pkgindex_file == "Packages.gz":
-                        f = GzipFile(fileobj=f, mode="rb")
+                        success = portage.getbinpkg.file_get(
+                            fcmd=fcmd, fcmd_vars=fcmd_vars
+                        )
+                        if not success:
+                            raise OSError(f"{setting} failed")
+                        f = open(tmp_filename, "rb")
 
-                    f_dec = codecs.iterdecode(
-                        f, _encodings["repo.content"], errors="replace"
-                    )
-                    try:
-                        rmt_idx.readHeader(f_dec)
-                        if (
-                            not remote_timestamp
-                        ):  # in case it had not been read from HTTP header
-                            remote_timestamp = rmt_idx.header.get("TIMESTAMP", 
None)
-                        if not remote_timestamp:
-                            # no timestamp in the header, something's wrong
-                            pkgindex = None
+                f_dec = codecs.iterdecode(
+                    f, _encodings["repo.content"], errors="replace"
+                )
+                try:
+                    rmt_idx.readHeader(f_dec)
+                    if (
+                        not remote_timestamp
+                    ):  # in case it had not been read from HTTP header
+                        remote_timestamp = rmt_idx.header.get("TIMESTAMP", 
None)
+                    if not remote_timestamp:
+                        # no timestamp in the header, something's wrong
+                        pkgindex = None
+                        writemsg(
+                            _(
+                                "\n\n!!! Binhost package index "
+                                " has no TIMESTAMP field.\n"
+                            ),
+                            noiselevel=-1,
+                        )
+                    else:
+                        if not self._pkgindex_version_supported(rmt_idx):
                             writemsg(
                                 _(
-                                    "\n\n!!! Binhost package index "
-                                    " has no TIMESTAMP field.\n"
-                                ),
-                                noiselevel=-1,
-                            )
-                        else:
-                            if not self._pkgindex_version_supported(rmt_idx):
-                                writemsg(
-                                    _(
-                                        "\n\n!!! Binhost package index version"
-                                        " is not supported: '%s'\n"
-                                    )
-                                    % rmt_idx.header.get("VERSION"),
-                                    noiselevel=-1,
+                                    "\n\n!!! Binhost package index version"
+                                    " is not supported: '%s'\n"
                                 )
-                                pkgindex = None
-                            elif not local_timestamp or int(local_timestamp) < 
int(
-                                remote_timestamp
-                            ):
-                                rmt_idx.readBody(f_dec)
-                                pkgindex = rmt_idx
-                    finally:
-                        # Timeout after 5 seconds, in case close() blocks
-                        # indefinitely (see bug #350139).
-                        try:
-                            try:
-                                AlarmSignal.register(5)
-                                f.close()
-                            finally:
-                                AlarmSignal.unregister()
-                        except AlarmSignal:
-                            writemsg(
-                                "\n\n!!! %s\n"
-                                % _("Timed out while closing connection to 
binhost"),
+                                % rmt_idx.header.get("VERSION"),
                                 noiselevel=-1,
                             )
-                    break
-                except UseCachedCopyOfRemoteIndex:
-                    changed = False
-                    desc = "frozen" if repo.frozen else "up-to-date"
-                    writemsg_stdout("\n")
-                    writemsg_stdout(
-                        colorize(
-                            "GOOD",
-                            _("Local copy of remote index is %s and will be 
used.")
-                            % desc,
-                        )
-                        + "\n"
-                    )
-                    rmt_idx = pkgindex
-                except OSError as e:
-                    # This includes URLError which is raised for SSL
-                    # certificate errors when PEP 476 is supported.
-                    writemsg(
-                        _("\n\n!!! Error fetching binhost package" " info from 
'%s'\n")
-                        % _hide_url_passwd(base_url)
-                    )
-                    # With Python 2, the EnvironmentError message may
-                    # contain bytes or unicode, so use str to ensure
-                    # safety with all locales (bug #532784).
-                    try:
-                        error_msg = str(e)
-                    except UnicodeDecodeError as uerror:
-                        error_msg = str(
-                            uerror.object, encoding="utf_8", errors="replace"
-                        )
-                    writemsg(f"!!! {error_msg}\n\n")
-                    del e
-                    pkgindex = None
+                            pkgindex = None
+                        elif not local_timestamp or int(local_timestamp) < int(
+                            remote_timestamp
+                        ):
+                            rmt_idx.readBody(f_dec)
+                            pkgindex = rmt_idx
                 finally:
-                    if proc is not None:
-                        if proc.poll() is None:
-                            proc.kill()
-                            proc.wait()
-                        proc = None
-                    if tmp_filename is not None:
+                    # Timeout after 5 seconds, in case close() blocks
+                    # indefinitely (see bug #350139).
+                    try:
                         try:
-                            os.unlink(tmp_filename)
-                        except OSError:
-                            pass
-
+                            AlarmSignal.register(5)
+                            f.close()
+                        finally:
+                            AlarmSignal.unregister()
+                    except AlarmSignal:
+                        writemsg(
+                            "\n\n!!! %s\n"
+                            % _("Timed out while closing connection to 
binhost"),
+                            noiselevel=-1,
+                        )
+            except UseCachedCopyOfRemoteIndex:
+                changed = False
+                desc = "frozen" if repo.frozen else "up-to-date"
+                writemsg_stdout("\n")
+                writemsg_stdout(
+                    colorize(
+                        "GOOD",
+                        _("Local copy of remote index is %s and will be 
used.") % desc,
+                    )
+                    + "\n"
+                )
+                rmt_idx = pkgindex
+            except OSError as e:
+                # This includes URLError which is raised for SSL
+                # certificate errors when PEP 476 is supported.
+                writemsg(
+                    _("\n\n!!! Error fetching binhost package" " info from 
'%s'\n")
+                    % _hide_url_passwd(base_url)
+                )
+                # With Python 2, the EnvironmentError message may
+                # contain bytes or unicode, so use str to ensure
+                # safety with all locales (bug #532784).
+                try:
+                    error_msg = str(e)
+                except UnicodeDecodeError as uerror:
+                    error_msg = str(uerror.object, encoding="utf_8", 
errors="replace")
+                writemsg(f"!!! {error_msg}\n\n")
+                del e
+                pkgindex = None
+            if proc is not None:
+                if proc.poll() is None:
+                    proc.kill()
+                    proc.wait()
+                proc = None
+            if tmp_filename is not None:
+                try:
+                    os.unlink(tmp_filename)
+                except OSError:
+                    pass
             if pkgindex is rmt_idx and changed:
                 pkgindex.modified = False  # don't update the header
                 pkgindex.header["DOWNLOAD_TIMESTAMP"] = "%d" % time.time()

Reply via email to