Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Zac Medico
On 5/31/20 1:53 PM, Mike Gilbert wrote:
> On Sun, May 31, 2020 at 2:01 PM Zac Medico  wrote:
>>
>> On 5/31/20 6:17 AM, Mike Gilbert wrote:
>>> Unquote the URL basename when fetching from upstream.
>>> Quote the filename when fetching from mirrors.
>>>
>>> Bug: https://bugs.gentoo.org/719810
>>> Signed-off-by: Mike Gilbert 
>>> ---
>>>  lib/portage/dbapi/porttree.py   | 7 ++-
>>>  lib/portage/package/ebuild/fetch.py | 9 +++--
>>>  2 files changed, 13 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
>>> index 08af17bcd..984263039 100644
>>> --- a/lib/portage/dbapi/porttree.py
>>> +++ b/lib/portage/dbapi/porttree.py
>>> @@ -55,6 +55,11 @@ try:
>>>  except ImportError:
>>>   from urlparse import urlparse
>>>
>>> +try:
>>> + from urllib.parse import unquote as urlunquote
>>> +except ImportError:
>>> + from urllib import unquote as urlunquote
>>> +
>>>  if sys.hexversion >= 0x300:
>>>   # pylint: disable=W0622
>>>   basestring = str
>>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>>>   myuris.pop()
>>>   distfile = myuris.pop()
>>>   else:
>>> - distfile = os.path.basename(uri)
>>> + distfile = urlunquote(os.path.basename(uri))
>>>   if not distfile:
>>>   raise portage.exception.InvalidDependString(
>>>   ("getFetchMap(): '%s' SRC_URI has no 
>>> file " + \
>>> diff --git a/lib/portage/package/ebuild/fetch.py 
>>> b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..47c3ad28f 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -26,6 +26,11 @@ try:
>>>  except ImportError:
>>>   from urlparse import urlparse
>>>
>>> +try:
>>> + from urllib.parse import quote as urlquote
>>> +except ImportError:
>>> + from urllib import quote as urlquote
>>> +
>>>  import portage
>>>  portage.proxy.lazyimport.lazyimport(globals(),
>>>   'portage.package.ebuild.config:check_config_instance,config',
>>> @@ -351,7 +356,7 @@ _size_suffix_map = {
>>>
>>>  class FlatLayout(object):
>>>   def get_path(self, filename):
>>> - return filename
>>> + return urlquote(filename)
>>>
>>>   def get_filenames(self, distdir):
>>>   for dirpath, dirnames, filenames in os.walk(distdir,
>>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>>>   c = c // 4
>>>   ret += fnhash[:c] + '/'
>>>   fnhash = fnhash[c:]
>>> - return ret + filename
>>> + return ret + urlquote(filename)
>>>
>>>   def get_filenames(self, distdir):
>>>   pattern = ''
>>>
>>
>> We've also got these other basename calls in fetch.py:
>>
>>> diff --git a/lib/portage/package/ebuild/fetch.py 
>>> b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..56b375d58 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>>> else:
>>> for myuri in myuris:
>>> if urlparse(myuri).scheme:
>>> -   
>>> file_uri_tuples.append((os.path.basename(myuri), myuri))
>>> +   
>>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>>> else:
>>> -   
>>> file_uri_tuples.append((os.path.basename(myuri), None))
>>> +   
>>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
> 
> I'm not sure how to reach this particular code path. In my testing,
> the fetch() function gets passed an OrderedDict in myuris, and the
> filenames have already been unquoted, so we don't want to do it again.
> 
> Any idea how this "else" block would ever be executed?

This code is only for backward compatibility code, so we should probably
add a deprecation warning and forget about quoting/unquoting.
-- 
Thanks,
Zac



signature.asc
Description: OpenPGP digital signature


Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Mike Gilbert
On Sun, May 31, 2020 at 2:01 PM Zac Medico  wrote:
>
> On 5/31/20 6:17 AM, Mike Gilbert wrote:
> > Unquote the URL basename when fetching from upstream.
> > Quote the filename when fetching from mirrors.
> >
> > Bug: https://bugs.gentoo.org/719810
> > Signed-off-by: Mike Gilbert 
> > ---
> >  lib/portage/dbapi/porttree.py   | 7 ++-
> >  lib/portage/package/ebuild/fetch.py | 9 +++--
> >  2 files changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> > index 08af17bcd..984263039 100644
> > --- a/lib/portage/dbapi/porttree.py
> > +++ b/lib/portage/dbapi/porttree.py
> > @@ -55,6 +55,11 @@ try:
> >  except ImportError:
> >   from urlparse import urlparse
> >
> > +try:
> > + from urllib.parse import unquote as urlunquote
> > +except ImportError:
> > + from urllib import unquote as urlunquote
> > +
> >  if sys.hexversion >= 0x300:
> >   # pylint: disable=W0622
> >   basestring = str
> > @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
> >   myuris.pop()
> >   distfile = myuris.pop()
> >   else:
> > - distfile = os.path.basename(uri)
> > + distfile = urlunquote(os.path.basename(uri))
> >   if not distfile:
> >   raise portage.exception.InvalidDependString(
> >   ("getFetchMap(): '%s' SRC_URI has no 
> > file " + \
> > diff --git a/lib/portage/package/ebuild/fetch.py 
> > b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..47c3ad28f 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -26,6 +26,11 @@ try:
> >  except ImportError:
> >   from urlparse import urlparse
> >
> > +try:
> > + from urllib.parse import quote as urlquote
> > +except ImportError:
> > + from urllib import quote as urlquote
> > +
> >  import portage
> >  portage.proxy.lazyimport.lazyimport(globals(),
> >   'portage.package.ebuild.config:check_config_instance,config',
> > @@ -351,7 +356,7 @@ _size_suffix_map = {
> >
> >  class FlatLayout(object):
> >   def get_path(self, filename):
> > - return filename
> > + return urlquote(filename)
> >
> >   def get_filenames(self, distdir):
> >   for dirpath, dirnames, filenames in os.walk(distdir,
> > @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
> >   c = c // 4
> >   ret += fnhash[:c] + '/'
> >   fnhash = fnhash[c:]
> > - return ret + filename
> > + return ret + urlquote(filename)
> >
> >   def get_filenames(self, distdir):
> >   pattern = ''
> >
>
> We've also got these other basename calls in fetch.py:
>
> > diff --git a/lib/portage/package/ebuild/fetch.py 
> > b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..56b375d58 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> > else:
> > for myuri in myuris:
> > if urlparse(myuri).scheme:
> > -   
> > file_uri_tuples.append((os.path.basename(myuri), myuri))
> > +   
> > file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> > else:
> > -   
> > file_uri_tuples.append((os.path.basename(myuri), None))
> > +   
> > file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))

I'm not sure how to reach this particular code path. In my testing,
the fetch() function gets passed an OrderedDict in myuris, and the
filenames have already been unquoted, so we don't want to do it again.

Any idea how this "else" block would ever be executed?



Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Mike Gilbert
On Sun, May 31, 2020 at 4:32 PM Zac Medico  wrote:
>
> On 5/31/20 1:24 PM, Ulrich Mueller wrote:
> >> On Sun, 31 May 2020, Zac Medico wrote:
> >
> >> We've also got these other basename calls in fetch.py:
> >
> >>> diff --git a/lib/portage/package/ebuild/fetch.py 
> >>> b/lib/portage/package/ebuild/fetch.py
> >>> index 28e7caf53..56b375d58 100644
> >>> --- a/lib/portage/package/ebuild/fetch.py
> >>> +++ b/lib/portage/package/ebuild/fetch.py
> >>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >>> else:
> >>> for myuri in myuris:
> >>> if urlparse(myuri).scheme:
> >>> -   
> >>> file_uri_tuples.append((os.path.basename(myuri), myuri))
> >>> +   
> >>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> >>> else:
> >>> -   
> >>> file_uri_tuples.append((os.path.basename(myuri), None))
> >>> +   
> >>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
> >
> >> The case with no scheme is not a URI, so we need to decide whether
> >> or not to unquote, and we should make _parse_uri_map behavior
> >> consistent for this case.
> >
> > I think that this follows from PMS, section 8.2 [1]:
> >
> > | The following elements are recognised in at least one class of
> > | specification. All elements must be surrounded on both sides by
> > | whitespace, except at the start and end of the string.
> > |
> > | [...]
> > | - A URI, in the form proto://host/path. Permitted in SRC_URI and
> > | HOMEPAGE. In EAPIs listed in table 8.4 as supporting SRC_URI arrows,
> > | may optionally be followed by whitespace, then ->, then whitespace,
> > | then a simple filename when in SRC_URI. For SRC_URI behaviour, see
> > | section 8.2.10.
> > | - A flat filename. Permitted in SRC_URI.
> >
> > So if it isn't an URI then it is a "flat filename" which IMHO is to be
> > interpreted literally without any unquoting.
> >
> > Ulrich
> >
> > [1] https://projects.gentoo.org/pms/7/pms.html#x1-690008.2
> >
>
> This interpretation sounds good to me.

Agreed.



Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Zac Medico
On 5/31/20 1:24 PM, Ulrich Mueller wrote:
>> On Sun, 31 May 2020, Zac Medico wrote:
> 
>> We've also got these other basename calls in fetch.py:
> 
>>> diff --git a/lib/portage/package/ebuild/fetch.py 
>>> b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..56b375d58 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>>> else:
>>> for myuri in myuris:
>>> if urlparse(myuri).scheme:
>>> -   
>>> file_uri_tuples.append((os.path.basename(myuri), myuri))
>>> +   
>>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>>> else:
>>> -   
>>> file_uri_tuples.append((os.path.basename(myuri), None))
>>> +   
>>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
> 
>> The case with no scheme is not a URI, so we need to decide whether
>> or not to unquote, and we should make _parse_uri_map behavior
>> consistent for this case.
> 
> I think that this follows from PMS, section 8.2 [1]:
> 
> | The following elements are recognised in at least one class of
> | specification. All elements must be surrounded on both sides by
> | whitespace, except at the start and end of the string.
> | 
> | [...]
> | - A URI, in the form proto://host/path. Permitted in SRC_URI and
> | HOMEPAGE. In EAPIs listed in table 8.4 as supporting SRC_URI arrows,
> | may optionally be followed by whitespace, then ->, then whitespace,
> | then a simple filename when in SRC_URI. For SRC_URI behaviour, see
> | section 8.2.10.
> | - A flat filename. Permitted in SRC_URI.
> 
> So if it isn't an URI then it is a "flat filename" which IMHO is to be
> interpreted literally without any unquoting.
> 
> Ulrich
> 
> [1] https://projects.gentoo.org/pms/7/pms.html#x1-690008.2
> 

This interpretation sounds good to me.
-- 
Thanks,
Zac



signature.asc
Description: OpenPGP digital signature


Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Ulrich Mueller
> On Sun, 31 May 2020, Zac Medico wrote:

> We've also got these other basename calls in fetch.py:

>> diff --git a/lib/portage/package/ebuild/fetch.py 
>> b/lib/portage/package/ebuild/fetch.py
>> index 28e7caf53..56b375d58 100644
>> --- a/lib/portage/package/ebuild/fetch.py
>> +++ b/lib/portage/package/ebuild/fetch.py
>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>> else:
>> for myuri in myuris:
>> if urlparse(myuri).scheme:
>> -   
>> file_uri_tuples.append((os.path.basename(myuri), myuri))
>> +   
>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>> else:
>> -   
>> file_uri_tuples.append((os.path.basename(myuri), None))
>> +   
>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))

> The case with no scheme is not a URI, so we need to decide whether
> or not to unquote, and we should make _parse_uri_map behavior
> consistent for this case.

I think that this follows from PMS, section 8.2 [1]:

| The following elements are recognised in at least one class of
| specification. All elements must be surrounded on both sides by
| whitespace, except at the start and end of the string.
| 
| [...]
| - A URI, in the form proto://host/path. Permitted in SRC_URI and
| HOMEPAGE. In EAPIs listed in table 8.4 as supporting SRC_URI arrows,
| may optionally be followed by whitespace, then ->, then whitespace,
| then a simple filename when in SRC_URI. For SRC_URI behaviour, see
| section 8.2.10.
| - A flat filename. Permitted in SRC_URI.

So if it isn't an URI then it is a "flat filename" which IMHO is to be
interpreted literally without any unquoting.

Ulrich

[1] https://projects.gentoo.org/pms/7/pms.html#x1-690008.2


signature.asc
Description: PGP signature


Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Zac Medico
On 5/31/20 12:51 PM, Mike Gilbert wrote:
> On Sun, May 31, 2020 at 3:36 PM Zac Medico  wrote:
>>
>> On 5/31/20 12:21 PM, Mike Gilbert wrote:
>>> On Sun, May 31, 2020 at 2:01 PM Zac Medico  wrote:

 On 5/31/20 6:17 AM, Mike Gilbert wrote:
> Unquote the URL basename when fetching from upstream.
> Quote the filename when fetching from mirrors.
>
> Bug: https://bugs.gentoo.org/719810
> Signed-off-by: Mike Gilbert 
> ---
>  lib/portage/dbapi/porttree.py   | 7 ++-
>  lib/portage/package/ebuild/fetch.py | 9 +++--
>  2 files changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> index 08af17bcd..984263039 100644
> --- a/lib/portage/dbapi/porttree.py
> +++ b/lib/portage/dbapi/porttree.py
> @@ -55,6 +55,11 @@ try:
>  except ImportError:
>   from urlparse import urlparse
>
> +try:
> + from urllib.parse import unquote as urlunquote
> +except ImportError:
> + from urllib import unquote as urlunquote
> +
>  if sys.hexversion >= 0x300:
>   # pylint: disable=W0622
>   basestring = str
> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>   myuris.pop()
>   distfile = myuris.pop()
>   else:
> - distfile = os.path.basename(uri)
> + distfile = urlunquote(os.path.basename(uri))
>   if not distfile:
>   raise portage.exception.InvalidDependString(
>   ("getFetchMap(): '%s' SRC_URI has 
> no file " + \
> diff --git a/lib/portage/package/ebuild/fetch.py 
> b/lib/portage/package/ebuild/fetch.py
> index 28e7caf53..47c3ad28f 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -26,6 +26,11 @@ try:
>  except ImportError:
>   from urlparse import urlparse
>
> +try:
> + from urllib.parse import quote as urlquote
> +except ImportError:
> + from urllib import quote as urlquote
> +
>  import portage
>  portage.proxy.lazyimport.lazyimport(globals(),
>   'portage.package.ebuild.config:check_config_instance,config',
> @@ -351,7 +356,7 @@ _size_suffix_map = {
>
>  class FlatLayout(object):
>   def get_path(self, filename):
> - return filename
> + return urlquote(filename)
>
>   def get_filenames(self, distdir):
>   for dirpath, dirnames, filenames in os.walk(distdir,
> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>   c = c // 4
>   ret += fnhash[:c] + '/'
>   fnhash = fnhash[c:]
> - return ret + filename
> + return ret + urlquote(filename)
>
>   def get_filenames(self, distdir):
>   pattern = ''
>

 We've also got these other basename calls in fetch.py:

> diff --git a/lib/portage/package/ebuild/fetch.py 
> b/lib/portage/package/ebuild/fetch.py
> index 28e7caf53..56b375d58 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> else:
> for myuri in myuris:
> if urlparse(myuri).scheme:
> -   
> file_uri_tuples.append((os.path.basename(myuri), myuri))
> +   
> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> else:
> -   
> file_uri_tuples.append((os.path.basename(myuri), None))
> +   
> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))

 The case with no scheme is not a URI, so we need to decide whether
 or not to unquote, and we should make _parse_uri_map behavior
 consistent for this case.
>>>
>>> Backing up, I think unquoting the basename is really a separate issue
>>> from quoting the filename in Layout.get_path(). The latter fixes a
>>> known bug when fetching from mirrors, whereas the former seems like a
>>> cosmetic issue. I don't think we should mix these two up into the same
>>> commit.
>>>
>>> Could I get your approval to push my original patch (Escape
>>> percent-signs in filename when fetching from mirrors)?
>>
>> Separate patches are fine, but both patches really should be merged at
>> the same time since the quoting and unquoting are inherently coupled.
> 
> I think that they are not actually coupled at all. It's two sets of
> code used in different contexts.
> 
> My original patch could and should be 

Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Mike Gilbert
On Sun, May 31, 2020 at 3:36 PM Zac Medico  wrote:
>
> On 5/31/20 12:21 PM, Mike Gilbert wrote:
> > On Sun, May 31, 2020 at 2:01 PM Zac Medico  wrote:
> >>
> >> On 5/31/20 6:17 AM, Mike Gilbert wrote:
> >>> Unquote the URL basename when fetching from upstream.
> >>> Quote the filename when fetching from mirrors.
> >>>
> >>> Bug: https://bugs.gentoo.org/719810
> >>> Signed-off-by: Mike Gilbert 
> >>> ---
> >>>  lib/portage/dbapi/porttree.py   | 7 ++-
> >>>  lib/portage/package/ebuild/fetch.py | 9 +++--
> >>>  2 files changed, 13 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> >>> index 08af17bcd..984263039 100644
> >>> --- a/lib/portage/dbapi/porttree.py
> >>> +++ b/lib/portage/dbapi/porttree.py
> >>> @@ -55,6 +55,11 @@ try:
> >>>  except ImportError:
> >>>   from urlparse import urlparse
> >>>
> >>> +try:
> >>> + from urllib.parse import unquote as urlunquote
> >>> +except ImportError:
> >>> + from urllib import unquote as urlunquote
> >>> +
> >>>  if sys.hexversion >= 0x300:
> >>>   # pylint: disable=W0622
> >>>   basestring = str
> >>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
> >>>   myuris.pop()
> >>>   distfile = myuris.pop()
> >>>   else:
> >>> - distfile = os.path.basename(uri)
> >>> + distfile = urlunquote(os.path.basename(uri))
> >>>   if not distfile:
> >>>   raise portage.exception.InvalidDependString(
> >>>   ("getFetchMap(): '%s' SRC_URI has 
> >>> no file " + \
> >>> diff --git a/lib/portage/package/ebuild/fetch.py 
> >>> b/lib/portage/package/ebuild/fetch.py
> >>> index 28e7caf53..47c3ad28f 100644
> >>> --- a/lib/portage/package/ebuild/fetch.py
> >>> +++ b/lib/portage/package/ebuild/fetch.py
> >>> @@ -26,6 +26,11 @@ try:
> >>>  except ImportError:
> >>>   from urlparse import urlparse
> >>>
> >>> +try:
> >>> + from urllib.parse import quote as urlquote
> >>> +except ImportError:
> >>> + from urllib import quote as urlquote
> >>> +
> >>>  import portage
> >>>  portage.proxy.lazyimport.lazyimport(globals(),
> >>>   'portage.package.ebuild.config:check_config_instance,config',
> >>> @@ -351,7 +356,7 @@ _size_suffix_map = {
> >>>
> >>>  class FlatLayout(object):
> >>>   def get_path(self, filename):
> >>> - return filename
> >>> + return urlquote(filename)
> >>>
> >>>   def get_filenames(self, distdir):
> >>>   for dirpath, dirnames, filenames in os.walk(distdir,
> >>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
> >>>   c = c // 4
> >>>   ret += fnhash[:c] + '/'
> >>>   fnhash = fnhash[c:]
> >>> - return ret + filename
> >>> + return ret + urlquote(filename)
> >>>
> >>>   def get_filenames(self, distdir):
> >>>   pattern = ''
> >>>
> >>
> >> We've also got these other basename calls in fetch.py:
> >>
> >>> diff --git a/lib/portage/package/ebuild/fetch.py 
> >>> b/lib/portage/package/ebuild/fetch.py
> >>> index 28e7caf53..56b375d58 100644
> >>> --- a/lib/portage/package/ebuild/fetch.py
> >>> +++ b/lib/portage/package/ebuild/fetch.py
> >>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >>> else:
> >>> for myuri in myuris:
> >>> if urlparse(myuri).scheme:
> >>> -   
> >>> file_uri_tuples.append((os.path.basename(myuri), myuri))
> >>> +   
> >>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> >>> else:
> >>> -   
> >>> file_uri_tuples.append((os.path.basename(myuri), None))
> >>> +   
> >>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
> >>
> >> The case with no scheme is not a URI, so we need to decide whether
> >> or not to unquote, and we should make _parse_uri_map behavior
> >> consistent for this case.
> >
> > Backing up, I think unquoting the basename is really a separate issue
> > from quoting the filename in Layout.get_path(). The latter fixes a
> > known bug when fetching from mirrors, whereas the former seems like a
> > cosmetic issue. I don't think we should mix these two up into the same
> > commit.
> >
> > Could I get your approval to push my original patch (Escape
> > percent-signs in filename when fetching from mirrors)?
>
> Separate patches are fine, but both patches really should be merged at
> the same time since the quoting and unquoting are inherently coupled.

I think that they are not actually coupled at all. It's two sets of
code used in different contexts.

My original patch could and should be pushed independently of any
subsequent changes.

The 

Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Zac Medico
On 5/31/20 12:21 PM, Mike Gilbert wrote:
> On Sun, May 31, 2020 at 2:01 PM Zac Medico  wrote:
>>
>> On 5/31/20 6:17 AM, Mike Gilbert wrote:
>>> Unquote the URL basename when fetching from upstream.
>>> Quote the filename when fetching from mirrors.
>>>
>>> Bug: https://bugs.gentoo.org/719810
>>> Signed-off-by: Mike Gilbert 
>>> ---
>>>  lib/portage/dbapi/porttree.py   | 7 ++-
>>>  lib/portage/package/ebuild/fetch.py | 9 +++--
>>>  2 files changed, 13 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
>>> index 08af17bcd..984263039 100644
>>> --- a/lib/portage/dbapi/porttree.py
>>> +++ b/lib/portage/dbapi/porttree.py
>>> @@ -55,6 +55,11 @@ try:
>>>  except ImportError:
>>>   from urlparse import urlparse
>>>
>>> +try:
>>> + from urllib.parse import unquote as urlunquote
>>> +except ImportError:
>>> + from urllib import unquote as urlunquote
>>> +
>>>  if sys.hexversion >= 0x300:
>>>   # pylint: disable=W0622
>>>   basestring = str
>>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>>>   myuris.pop()
>>>   distfile = myuris.pop()
>>>   else:
>>> - distfile = os.path.basename(uri)
>>> + distfile = urlunquote(os.path.basename(uri))
>>>   if not distfile:
>>>   raise portage.exception.InvalidDependString(
>>>   ("getFetchMap(): '%s' SRC_URI has no 
>>> file " + \
>>> diff --git a/lib/portage/package/ebuild/fetch.py 
>>> b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..47c3ad28f 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -26,6 +26,11 @@ try:
>>>  except ImportError:
>>>   from urlparse import urlparse
>>>
>>> +try:
>>> + from urllib.parse import quote as urlquote
>>> +except ImportError:
>>> + from urllib import quote as urlquote
>>> +
>>>  import portage
>>>  portage.proxy.lazyimport.lazyimport(globals(),
>>>   'portage.package.ebuild.config:check_config_instance,config',
>>> @@ -351,7 +356,7 @@ _size_suffix_map = {
>>>
>>>  class FlatLayout(object):
>>>   def get_path(self, filename):
>>> - return filename
>>> + return urlquote(filename)
>>>
>>>   def get_filenames(self, distdir):
>>>   for dirpath, dirnames, filenames in os.walk(distdir,
>>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>>>   c = c // 4
>>>   ret += fnhash[:c] + '/'
>>>   fnhash = fnhash[c:]
>>> - return ret + filename
>>> + return ret + urlquote(filename)
>>>
>>>   def get_filenames(self, distdir):
>>>   pattern = ''
>>>
>>
>> We've also got these other basename calls in fetch.py:
>>
>>> diff --git a/lib/portage/package/ebuild/fetch.py 
>>> b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..56b375d58 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>>> else:
>>> for myuri in myuris:
>>> if urlparse(myuri).scheme:
>>> -   
>>> file_uri_tuples.append((os.path.basename(myuri), myuri))
>>> +   
>>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>>> else:
>>> -   
>>> file_uri_tuples.append((os.path.basename(myuri), None))
>>> +   
>>> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
>>
>> The case with no scheme is not a URI, so we need to decide whether
>> or not to unquote, and we should make _parse_uri_map behavior
>> consistent for this case.
> 
> Backing up, I think unquoting the basename is really a separate issue
> from quoting the filename in Layout.get_path(). The latter fixes a
> known bug when fetching from mirrors, whereas the former seems like a
> cosmetic issue. I don't think we should mix these two up into the same
> commit.
> 
> Could I get your approval to push my original patch (Escape
> percent-signs in filename when fetching from mirrors)?

Separate patches are fine, but both patches really should be merged at
the same time since the quoting and unquoting are inherently coupled.
-- 
Thanks,
Zac



signature.asc
Description: OpenPGP digital signature


Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Mike Gilbert
On Sun, May 31, 2020 at 2:01 PM Zac Medico  wrote:
>
> On 5/31/20 6:17 AM, Mike Gilbert wrote:
> > Unquote the URL basename when fetching from upstream.
> > Quote the filename when fetching from mirrors.
> >
> > Bug: https://bugs.gentoo.org/719810
> > Signed-off-by: Mike Gilbert 
> > ---
> >  lib/portage/dbapi/porttree.py   | 7 ++-
> >  lib/portage/package/ebuild/fetch.py | 9 +++--
> >  2 files changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> > index 08af17bcd..984263039 100644
> > --- a/lib/portage/dbapi/porttree.py
> > +++ b/lib/portage/dbapi/porttree.py
> > @@ -55,6 +55,11 @@ try:
> >  except ImportError:
> >   from urlparse import urlparse
> >
> > +try:
> > + from urllib.parse import unquote as urlunquote
> > +except ImportError:
> > + from urllib import unquote as urlunquote
> > +
> >  if sys.hexversion >= 0x300:
> >   # pylint: disable=W0622
> >   basestring = str
> > @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
> >   myuris.pop()
> >   distfile = myuris.pop()
> >   else:
> > - distfile = os.path.basename(uri)
> > + distfile = urlunquote(os.path.basename(uri))
> >   if not distfile:
> >   raise portage.exception.InvalidDependString(
> >   ("getFetchMap(): '%s' SRC_URI has no 
> > file " + \
> > diff --git a/lib/portage/package/ebuild/fetch.py 
> > b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..47c3ad28f 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -26,6 +26,11 @@ try:
> >  except ImportError:
> >   from urlparse import urlparse
> >
> > +try:
> > + from urllib.parse import quote as urlquote
> > +except ImportError:
> > + from urllib import quote as urlquote
> > +
> >  import portage
> >  portage.proxy.lazyimport.lazyimport(globals(),
> >   'portage.package.ebuild.config:check_config_instance,config',
> > @@ -351,7 +356,7 @@ _size_suffix_map = {
> >
> >  class FlatLayout(object):
> >   def get_path(self, filename):
> > - return filename
> > + return urlquote(filename)
> >
> >   def get_filenames(self, distdir):
> >   for dirpath, dirnames, filenames in os.walk(distdir,
> > @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
> >   c = c // 4
> >   ret += fnhash[:c] + '/'
> >   fnhash = fnhash[c:]
> > - return ret + filename
> > + return ret + urlquote(filename)
> >
> >   def get_filenames(self, distdir):
> >   pattern = ''
> >
>
> We've also got these other basename calls in fetch.py:
>
> > diff --git a/lib/portage/package/ebuild/fetch.py 
> > b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..56b375d58 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> > else:
> > for myuri in myuris:
> > if urlparse(myuri).scheme:
> > -   
> > file_uri_tuples.append((os.path.basename(myuri), myuri))
> > +   
> > file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> > else:
> > -   
> > file_uri_tuples.append((os.path.basename(myuri), None))
> > +   
> > file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
>
> The case with no scheme is not a URI, so we need to decide whether
> or not to unquote, and we should make _parse_uri_map behavior
> consistent for this case.

Backing up, I think unquoting the basename is really a separate issue
from quoting the filename in Layout.get_path(). The latter fixes a
known bug when fetching from mirrors, whereas the former seems like a
cosmetic issue. I don't think we should mix these two up into the same
commit.

Could I get your approval to push my original patch (Escape
percent-signs in filename when fetching from mirrors)?



Re: [gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Zac Medico
On 5/31/20 6:17 AM, Mike Gilbert wrote:
> Unquote the URL basename when fetching from upstream.
> Quote the filename when fetching from mirrors.
> 
> Bug: https://bugs.gentoo.org/719810
> Signed-off-by: Mike Gilbert 
> ---
>  lib/portage/dbapi/porttree.py   | 7 ++-
>  lib/portage/package/ebuild/fetch.py | 9 +++--
>  2 files changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> index 08af17bcd..984263039 100644
> --- a/lib/portage/dbapi/porttree.py
> +++ b/lib/portage/dbapi/porttree.py
> @@ -55,6 +55,11 @@ try:
>  except ImportError:
>   from urlparse import urlparse
>  
> +try:
> + from urllib.parse import unquote as urlunquote
> +except ImportError:
> + from urllib import unquote as urlunquote
> +
>  if sys.hexversion >= 0x300:
>   # pylint: disable=W0622
>   basestring = str
> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>   myuris.pop()
>   distfile = myuris.pop()
>   else:
> - distfile = os.path.basename(uri)
> + distfile = urlunquote(os.path.basename(uri))
>   if not distfile:
>   raise portage.exception.InvalidDependString(
>   ("getFetchMap(): '%s' SRC_URI has no 
> file " + \
> diff --git a/lib/portage/package/ebuild/fetch.py 
> b/lib/portage/package/ebuild/fetch.py
> index 28e7caf53..47c3ad28f 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -26,6 +26,11 @@ try:
>  except ImportError:
>   from urlparse import urlparse
>  
> +try:
> + from urllib.parse import quote as urlquote
> +except ImportError:
> + from urllib import quote as urlquote
> +
>  import portage
>  portage.proxy.lazyimport.lazyimport(globals(),
>   'portage.package.ebuild.config:check_config_instance,config',
> @@ -351,7 +356,7 @@ _size_suffix_map = {
>  
>  class FlatLayout(object):
>   def get_path(self, filename):
> - return filename
> + return urlquote(filename)
>  
>   def get_filenames(self, distdir):
>   for dirpath, dirnames, filenames in os.walk(distdir,
> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>   c = c // 4
>   ret += fnhash[:c] + '/'
>   fnhash = fnhash[c:]
> - return ret + filename
> + return ret + urlquote(filename)
>  
>   def get_filenames(self, distdir):
>   pattern = ''
> 

We've also got these other basename calls in fetch.py:

> diff --git a/lib/portage/package/ebuild/fetch.py 
> b/lib/portage/package/ebuild/fetch.py
> index 28e7caf53..56b375d58 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> else:
> for myuri in myuris:
> if urlparse(myuri).scheme:
> -   
> file_uri_tuples.append((os.path.basename(myuri), myuri))
> +   
> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> else:
> -   
> file_uri_tuples.append((os.path.basename(myuri), None))
> +   
> file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))

The case with no scheme is not a URI, so we need to decide whether
or not to unquote, and we should make _parse_uri_map behavior
consistent for this case.

>  
> filedict = OrderedDict()
> primaryuri_dict = {}
> @@ -1229,7 +1229,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> "information about how 
> to\n!!! correctly specify "
> "FETCHCOMMAND and 
> RESUMECOMMAND.\n"),
> level=logging.ERROR, 
> noiselevel=-1)
> -   if myfile != os.path.basename(loc):
> +   if myfile != 
> urlunquote(os.path.basename(loc)):
> return 0
>  
> if not can_fetch:


-- 
Thanks,
Zac



signature.asc
Description: OpenPGP digital signature


[gentoo-portage-dev] [PATCH] Improve handling of percent-signs in SRC_URI

2020-05-31 Thread Mike Gilbert
Unquote the URL basename when fetching from upstream.
Quote the filename when fetching from mirrors.

Bug: https://bugs.gentoo.org/719810
Signed-off-by: Mike Gilbert 
---
 lib/portage/dbapi/porttree.py   | 7 ++-
 lib/portage/package/ebuild/fetch.py | 9 +++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
index 08af17bcd..984263039 100644
--- a/lib/portage/dbapi/porttree.py
+++ b/lib/portage/dbapi/porttree.py
@@ -55,6 +55,11 @@ try:
 except ImportError:
from urlparse import urlparse
 
+try:
+   from urllib.parse import unquote as urlunquote
+except ImportError:
+   from urllib import unquote as urlunquote
+
 if sys.hexversion >= 0x300:
# pylint: disable=W0622
basestring = str
@@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
myuris.pop()
distfile = myuris.pop()
else:
-   distfile = os.path.basename(uri)
+   distfile = urlunquote(os.path.basename(uri))
if not distfile:
raise portage.exception.InvalidDependString(
("getFetchMap(): '%s' SRC_URI has no 
file " + \
diff --git a/lib/portage/package/ebuild/fetch.py 
b/lib/portage/package/ebuild/fetch.py
index 28e7caf53..47c3ad28f 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -26,6 +26,11 @@ try:
 except ImportError:
from urlparse import urlparse
 
+try:
+   from urllib.parse import quote as urlquote
+except ImportError:
+   from urllib import quote as urlquote
+
 import portage
 portage.proxy.lazyimport.lazyimport(globals(),
'portage.package.ebuild.config:check_config_instance,config',
@@ -351,7 +356,7 @@ _size_suffix_map = {
 
 class FlatLayout(object):
def get_path(self, filename):
-   return filename
+   return urlquote(filename)
 
def get_filenames(self, distdir):
for dirpath, dirnames, filenames in os.walk(distdir,
@@ -382,7 +387,7 @@ class FilenameHashLayout(object):
c = c // 4
ret += fnhash[:c] + '/'
fnhash = fnhash[c:]
-   return ret + filename
+   return ret + urlquote(filename)
 
def get_filenames(self, distdir):
pattern = ''
-- 
2.27.0.rc2