Quote URL only when needed: when it contains ' ' or stray '%'.
This avoids quoting of ',' and '~' in otherwise clean URLs, in
line with eg wget and Firefox.

RFC says both options are equivalent but apparently some servers
don't bother unquoting at all.

http://lists.baseurl.org/pipermail/yum-devel/2012-September/009618.html
---
 urlgrabber/grabber.py |   19 ++++++-------------
 1 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
index a33e017..2ba3603 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -479,6 +479,7 @@ BANDWIDTH THROTTLING
 
 
 
+import re
 import os
 import sys
 import urlparse
@@ -825,23 +826,15 @@ class URLParser:
         find ' '   ->  1
         find '%'   ->  1
         find '%XX' ->  0
-        else       ->  1
+        else       ->  0
         """
         (scheme, host, path, parm, query, frag) = parts
         if ' ' in path:
             return 1
-        ind = string.find(path, '%')
-        if ind > -1:
-            while ind > -1:
-                if len(path) < ind+3:
-                    return 1
-                code = path[ind+1:ind+3].upper()
-                if     code[0] not in self.hexvals or \
-                       code[1] not in self.hexvals:
-                    return 1
-                ind = string.find(path, '%', ind+1)
-            return 0
-        return 1
+        for nn in re.findall('%([0-9a-fA-F]{0,2})', path):
+            if len(nn) != 2:
+                return 1
+        return 0
     
 class URLGrabberOptions:
     """Class to ease kwargs handling."""
-- 
1.7.4.4

_______________________________________________
Yum-devel mailing list
[email protected]
http://lists.baseurl.org/mailman/listinfo/yum-devel

Reply via email to