Quote URL only when needed: when it contains ' ' or stray '%'. This avoids quoting of ',' and '~' in otherwise clean URLs, in line with eg wget and Firefox.
RFC says both options are equivalent but apparently some servers don't bother unquoting at all. http://lists.baseurl.org/pipermail/yum-devel/2012-September/009618.html --- urlgrabber/grabber.py | 19 ++++++------------- 1 files changed, 6 insertions(+), 13 deletions(-) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py index a33e017..2ba3603 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -479,6 +479,7 @@ BANDWIDTH THROTTLING +import re import os import sys import urlparse @@ -825,23 +826,15 @@ class URLParser: find ' ' -> 1 find '%' -> 1 find '%XX' -> 0 - else -> 1 + else -> 0 """ (scheme, host, path, parm, query, frag) = parts if ' ' in path: return 1 - ind = string.find(path, '%') - if ind > -1: - while ind > -1: - if len(path) < ind+3: - return 1 - code = path[ind+1:ind+3].upper() - if code[0] not in self.hexvals or \ - code[1] not in self.hexvals: - return 1 - ind = string.find(path, '%', ind+1) - return 0 - return 1 + for nn in re.findall('%([0-9a-fA-F]{0,2})', path): + if len(nn) != 2: + return 1 + return 0 class URLGrabberOptions: """Class to ease kwargs handling.""" -- 1.7.4.4 _______________________________________________ Yum-devel mailing list [email protected] http://lists.baseurl.org/mailman/listinfo/yum-devel
