On Sun, May 31, 2020 at 2:01 PM Zac Medico <zmed...@gentoo.org> wrote: > > On 5/31/20 6:17 AM, Mike Gilbert wrote: > > Unquote the URL basename when fetching from upstream. > > Quote the filename when fetching from mirrors. > > > > Bug: https://bugs.gentoo.org/719810 > > Signed-off-by: Mike Gilbert <flop...@gentoo.org> > > --- > > lib/portage/dbapi/porttree.py | 7 ++++++- > > lib/portage/package/ebuild/fetch.py | 9 +++++++-- > > 2 files changed, 13 insertions(+), 3 deletions(-) > > > > diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py > > index 08af17bcd..984263039 100644 > > --- a/lib/portage/dbapi/porttree.py > > +++ b/lib/portage/dbapi/porttree.py > > @@ -55,6 +55,11 @@ try: > > except ImportError: > > from urlparse import urlparse > > > > +try: > > + from urllib.parse import unquote as urlunquote > > +except ImportError: > > + from urllib import unquote as urlunquote > > + > > if sys.hexversion >= 0x3000000: > > # pylint: disable=W0622 > > basestring = str > > @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None): > > myuris.pop() > > distfile = myuris.pop() > > else: > > - distfile = os.path.basename(uri) > > + distfile = urlunquote(os.path.basename(uri)) > > if not distfile: > > raise portage.exception.InvalidDependString( > > ("getFetchMap(): '%s' SRC_URI has no > > file " + \ > > diff --git a/lib/portage/package/ebuild/fetch.py > > b/lib/portage/package/ebuild/fetch.py > > index 28e7caf53..47c3ad28f 100644 > > --- a/lib/portage/package/ebuild/fetch.py > > +++ b/lib/portage/package/ebuild/fetch.py > > @@ -26,6 +26,11 @@ try: > > except ImportError: > > from urlparse import urlparse > > > > +try: > > + from urllib.parse import quote as urlquote > > +except ImportError: > > + from urllib import quote as urlquote > > + > > import portage > > portage.proxy.lazyimport.lazyimport(globals(), > > 'portage.package.ebuild.config:check_config_instance,config', > > @@ -351,7 +356,7 @@ _size_suffix_map = { > > > > class FlatLayout(object): > > def get_path(self, filename): > > - return filename > > + return urlquote(filename) > > > > def get_filenames(self, distdir): > > for dirpath, dirnames, filenames in os.walk(distdir, > > @@ -382,7 +387,7 @@ class FilenameHashLayout(object): > > c = c // 4 > > ret += fnhash[:c] + '/' > > fnhash = fnhash[c:] > > - return ret + filename > > + return ret + urlquote(filename) > > > > def get_filenames(self, distdir): > > pattern = '' > > > > We've also got these other basename calls in fetch.py: > > > diff --git a/lib/portage/package/ebuild/fetch.py > > b/lib/portage/package/ebuild/fetch.py > > index 28e7caf53..56b375d58 100644 > > --- a/lib/portage/package/ebuild/fetch.py > > +++ b/lib/portage/package/ebuild/fetch.py > > @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, > > else: > > for myuri in myuris: > > if urlparse(myuri).scheme: > > - > > file_uri_tuples.append((os.path.basename(myuri), myuri)) > > + > > file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri)) > > else: > > - > > file_uri_tuples.append((os.path.basename(myuri), None)) > > + > > file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None)) > > The case with no scheme is not a URI, so we need to decide whether > or not to unquote, and we should make _parse_uri_map behavior > consistent for this case.
Backing up, I think unquoting the basename is really a separate issue from quoting the filename in Layout.get_path(). The latter fixes a known bug when fetching from mirrors, whereas the former seems like a cosmetic issue. I don't think we should mix these two up into the same commit. Could I get your approval to push my original patch (Escape percent-signs in filename when fetching from mirrors)?