--- urlgrabber/__init__.py | 2 +- urlgrabber/byterange.py | 81 ++++++++++++++++++++----------------- urlgrabber/grabber.py | 101 ++++++++++++++++++++++++++++------------------- urlgrabber/mirror.py | 14 +++++-- urlgrabber/progress.py | 13 +++++- 5 files changed, 125 insertions(+), 86 deletions(-)
diff --git a/urlgrabber/__init__.py b/urlgrabber/__init__.py index ddd5204..ed0e526 100644 --- a/urlgrabber/__init__.py +++ b/urlgrabber/__init__.py @@ -51,4 +51,4 @@ __author__ = 'Michael D. Stenner <[email protected]>, ' \ 'Seth Vidal <[email protected]>' __url__ = 'http://linux.duke.edu/projects/urlgrabber/' -from grabber import urlgrab, urlopen, urlread +from urlgrabber.grabber import urlgrab, urlopen, urlread diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py index f7caec4..5461da5 100644 --- a/urlgrabber/byterange.py +++ b/urlgrabber/byterange.py @@ -20,22 +20,36 @@ import os import stat -import urllib -import urllib2 -import rfc822 +import sys DEBUG = None -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +# python2+3 compatible imports +if sys.version_info.major < 3: + import urllib2 as urllib_request + import urlparse as urllib_parse + import urllib2 as urllib_error + from urllib2 import unquote + from mimetools import Message + from rfc822 import formatdate + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO +else: + import urllib.request as urllib_request + import urllib.parse as urllib_parse + import urllib.error as urllib_error + from urllib.parse import unquote + from email.message import Message + from email.utils import formatdate + from io import StringIO class RangeError(IOError): """Error raised when an unsatisfiable range is requested.""" pass -class HTTPRangeHandler(urllib2.BaseHandler): +class HTTPRangeHandler(urllib_request.BaseHandler): """Handler that enables HTTP Range headers. This was extremely simple. The Range header is a HTTP feature to @@ -61,7 +75,7 @@ class HTTPRangeHandler(urllib2.BaseHandler): def http_error_206(self, req, fp, code, msg, hdrs): # 206 Partial Content Response - r = urllib.addinfourl(fp, hdrs, req.get_full_url()) + r = urllib_request.addinfourl(fp, hdrs, req.get_full_url()) r.code = code r.msg = msg return r @@ -211,25 +225,23 @@ class RangeableFileObject: raise RangeError(9, 'Requested Range Not Satisfiable') pos+= bufsize -class FileRangeHandler(urllib2.FileHandler): +class FileRangeHandler(urllib_request.FileHandler): """FileHandler subclass that adds Range support. This class handles Range headers exactly like an HTTP server would. """ def open_local_file(self, req): - import mimetypes - import mimetools host = req.get_host() file = req.get_selector() - localfile = urllib.url2pathname(file) + localfile = urllib_request.url2pathname(file) stats = os.stat(localfile) size = stats[stat.ST_SIZE] - modified = rfc822.formatdate(stats[stat.ST_MTIME]) + modified = formatdate(stats[stat.ST_MTIME]) mtype = mimetypes.guess_type(file)[0] if host: - host, port = urllib.splitport(host) + host, port = urllib_parse.splitport(host) if port or socket.gethostbyname(host) not in self.get_names(): - raise urllib2.URLError('file not on local host') + raise urllib_error.URLError('file not on local host') fo = open(localfile,'rb') brange = req.headers.get('Range',None) brange = range_header_to_tuple(brange) @@ -241,10 +253,10 @@ class FileRangeHandler(urllib2.FileHandler): raise RangeError(9, 'Requested Range Not Satisfiable') size = (lb - fb) fo = RangeableFileObject(fo, (fb,lb)) - headers = mimetools.Message(StringIO( + headers = Message(StringIO( 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) - return urllib.addinfourl(fo, headers, 'file:'+file) + return urllib_request.addinfourl(fo, headers, 'file:'+file) # FTP Range Support @@ -254,29 +266,25 @@ class FileRangeHandler(urllib2.FileHandler): # follows: # -- range support modifications start/end here -from urllib import splitport, splituser, splitpasswd, splitattr, \ - unquote, addclosehook, addinfourl import ftplib import socket -import sys import mimetypes -import mimetools -class FTPRangeHandler(urllib2.FTPHandler): +class FTPRangeHandler(urllib_request.FTPHandler): def ftp_open(self, req): host = req.get_host() if not host: raise IOError('ftp error', 'no host given') - host, port = splitport(host) + host, port = urllib_parse.splitport(host) if port is None: port = ftplib.FTP_PORT else: port = int(port) # username/password handling - user, host = splituser(host) + user, host = urllib_parse.splituser(host) if user: - user, passwd = splitpasswd(user) + user, passwd = urllib_parse.splitpasswd(user) else: passwd = None host = unquote(host) @@ -286,18 +294,17 @@ class FTPRangeHandler(urllib2.FTPHandler): try: host = socket.gethostbyname(host) except socket.error as msg: - raise urllib2.URLError(msg) - path, attrs = splitattr(req.get_selector()) - dirs = path.split('/') - dirs = map(unquote, dirs) - dirs, file = dirs[:-1], dirs[-1] + raise urllib_error.URLError(msg) + path, attrs = urllib_parse.splitattr(req.get_selector()) + dirs = [unquote(i) for i in path.split('/')] + file = dirs.pop() if dirs and not dirs[0]: - dirs = dirs[1:] + del dirs[0] try: fw = self.connect_ftp(user, passwd, host, port, dirs) type = file and 'I' or 'D' for attr in attrs: - attr, value = splitattr(attr) + attr, value = urllib_parse.splitattr(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() @@ -336,8 +343,8 @@ class FTPRangeHandler(urllib2.FTPHandler): if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen sf = StringIO(headers) - headers = mimetools.Message(sf) - return addinfourl(fp, headers, req.get_full_url()) + headers = Message(sf) + return urllib_request.addinfourl(fp, headers, req.get_full_url()) except ftplib.all_errors as msg: raise IOError('ftp error', msg) @@ -345,7 +352,7 @@ class FTPRangeHandler(urllib2.FTPHandler): fw = ftpwrapper(user, passwd, host, port, dirs) return fw -class ftpwrapper(urllib.ftpwrapper): +class ftpwrapper(urllib_request.ftpwrapper): # range support note: # this ftpwrapper code is copied directly from # urllib. The only enhancement is to add the rest @@ -389,7 +396,7 @@ class ftpwrapper(urllib.ftpwrapper): conn = self.ftp.ntransfercmd(cmd) self.busy = 1 # Pass back both a suitably decorated object and a retrieval length - return (addclosehook(conn[0].makefile('rb'), + return (urllib_request.addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1]) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py index 465acd2..72d654d 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py @@ -467,22 +467,42 @@ from __future__ import print_function import os import sys -import urlparse import time import string -import urllib -import urllib2 -from httplib import responses -import mimetools -import thread import types import stat import pycurl from ftplib import parse150 -from StringIO import StringIO -from httplib import HTTPException import socket, select, fcntl -from byterange import range_tuple_normalize, range_tuple_to_header, RangeError +from urlgrabber.byterange import range_tuple_normalize, range_tuple_to_header, RangeError + +# python2+3 compatible imports +if sys.version_info.major < 3: + import urllib2 as urllib_request + import urlparse as urllib_parse + import urllib2 as urllib_error + from urllib2 import quote, unquote + from httplib import responses, HTTPException + from mimetools import Message + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + from types import StringTypes +else: + import urllib.request as urllib_request + import urllib.parse as urllib_parse + import urllib.error as urllib_error + from urllib.parse import quote, unquote + from http.client import responses, HTTPException + from email.message import Message + from io import StringIO + import collections + def callable(cb): + return isinstance(cb, collections.Callable) + StringTypes = bytes, str + unicode = str + long = int try: import xattr @@ -751,14 +771,14 @@ class URLParser: if opts.prefix: url = self.add_prefix(url, opts.prefix) - parts = urlparse.urlparse(url) + parts = urllib_parse.urlparse(url) (scheme, host, path, parm, query, frag) = parts if not scheme or (len(scheme) == 1 and scheme in string.letters): # if a scheme isn't specified, we guess that it's "file:" if url[0] not in '/\\': url = os.path.abspath(url) - url = 'file:' + urllib.pathname2url(url) - parts = urlparse.urlparse(url) + url = 'file:' + urllib_request.pathname2url(url) + parts = urllib_parse.urlparse(url) quote = 0 # pathname2url quotes, so we won't do it again if scheme in ['http', 'https']: @@ -769,7 +789,7 @@ class URLParser: if quote: parts = self.quote(parts) - url = urlparse.urlunparse(parts) + url = urllib_parse.urlunparse(parts) return url, parts def add_prefix(self, url, prefix): @@ -793,7 +813,7 @@ class URLParser: passing into urlgrabber. """ (scheme, host, path, parm, query, frag) = parts - path = urllib.quote(path) + path = quote(path) return (scheme, host, path, parm, query, frag) hexvals = '0123456789ABCDEF' @@ -897,7 +917,7 @@ class URLGrabberOptions: def _set_attributes(self, **kwargs): """Update object attributes with those provided in kwargs.""" self.__dict__.update(kwargs) - if kwargs.has_key('range'): + if 'range' in kwargs: # normalize the supplied range value self.range = range_tuple_normalize(self.range) if not self.reget in [None, 'simple', 'check_timestamp']: @@ -963,7 +983,7 @@ class URLGrabberOptions: return self.format() def format(self, indent=' '): - keys = self.__dict__.keys() + keys = list(self.__dict__.keys()) if self.delegate is not None: keys.remove('delegate') keys.sort() @@ -1018,7 +1038,7 @@ class URLGrabber(object): if DEBUG: DEBUG.info('attempt %i/%s: %s', tries, opts.retry, args[0]) try: - r = apply(func, (opts,) + args, {}) + r = func(*(opts,) + args) if DEBUG: DEBUG.info('success') return r except URLGrabError as e: @@ -1076,14 +1096,14 @@ class URLGrabber(object): (scheme, host, path, parm, query, frag) = parts opts.find_proxy(url, scheme) if filename is None: - filename = os.path.basename( urllib.unquote(path) ) + filename = os.path.basename(unquote(path)) if not filename: # This is better than nothing. filename = 'index.html' if scheme == 'file' and not opts.copy_local: # just return the name of the local file - don't make a # copy currently - path = urllib.url2pathname(path) + path = urllib_request.url2pathname(path) if host: path = os.path.normpath('//' + host + path) if not os.path.exists(path): @@ -1190,7 +1210,7 @@ class PyCurlFileObject(object): self._hdr_dump = '' self._parsed_hdr = None self.url = url - self.scheme = urlparse.urlsplit(self.url)[0] + self.scheme = urllib_parse.urlsplit(self.url)[0] self.filename = filename self.append = False self.reget_time = None @@ -1226,7 +1246,7 @@ class PyCurlFileObject(object): if self.opts.progress_obj: size = self.size + self._reget_length self.opts.progress_obj.start(self._prog_reportname, - urllib.unquote(self.url), + unquote(self.url), self._prog_basename, size=size, text=self.opts.text) @@ -1268,7 +1288,7 @@ class PyCurlFileObject(object): if buf.lower().find('location') != -1: location = ':'.join(buf.split(':')[1:]) location = location.strip() - self.scheme = urlparse.urlsplit(location)[0] + self.scheme = urllib_parse.urlsplit(location)[0] self.url = location if len(self._hdr_dump) != 0 and buf == '\r\n': @@ -1287,7 +1307,7 @@ class PyCurlFileObject(object): hdrfp = StringIO() hdrfp.write(self._hdr_dump[statusend:]) hdrfp.seek(0) - self._parsed_hdr = mimetools.Message(hdrfp) + self._parsed_hdr = Message(hdrfp) return self._parsed_hdr hdr = property(_return_hdr_obj) @@ -1416,7 +1436,7 @@ class PyCurlFileObject(object): code = self.http_code errcode = e.args[0] - errurl = urllib.unquote(self.url) + errurl = unquote(self.url) if self._error[0]: errcode = self._error[0] @@ -1548,7 +1568,7 @@ class PyCurlFileObject(object): if self._error[1]: msg = self._error[1] err = URLGrabError(14, msg) - err.url = urllib.unquote(self.url) + err.url = unquote(self.url) raise err def _do_open(self): @@ -1565,7 +1585,7 @@ class PyCurlFileObject(object): def _build_range(self): reget_length = 0 rt = None - if self.opts.reget and type(self.filename) in types.StringTypes: + if self.opts.reget and type(self.filename) in StringTypes: # we have reget turned on and we're dumping to a file try: s = os.stat(self.filename) @@ -1624,7 +1644,7 @@ class PyCurlFileObject(object): err = URLGrabError(9, _('%s on %s') % (e, self.url)) err.url = self.url raise err - except urllib2.HTTPError as e: + except urllib_error.HTTPError as e: new_e = URLGrabError(14, _('%s on %s') % (e, self.url)) new_e.code = e.code new_e.exception = e @@ -1660,7 +1680,7 @@ class PyCurlFileObject(object): if self._complete: return _was_filename = False - if type(self.filename) in types.StringTypes and self.filename: + if type(self.filename) in StringTypes and self.filename: _was_filename = True self._prog_reportname = str(self.filename) self._prog_basename = os.path.basename(self.filename) @@ -2047,7 +2067,7 @@ class _ExternalDownloader: for line in lines: # parse downloader output line = line.split(' ', 5) - _id, size = map(int, line[:2]) + _id, size = int(line[0]), int(line[1]) if len(line) == 2: self.running[_id].progress_obj.update(size) continue @@ -2075,7 +2095,7 @@ class _ExternalDownloaderPool: self.cache = {} def start(self, opts): - host = urlparse.urlsplit(opts.url).netloc + host = urllib_parse.urlsplit(opts.url).netloc dl = self.cache.pop(host, None) if not dl: dl = _ExternalDownloader() @@ -2095,7 +2115,7 @@ class _ExternalDownloaderPool: ret.extend(done) # dl finished, move it to the cache - host = urlparse.urlsplit(done[0][0].url).netloc + host = urllib_parse.urlsplit(done[0][0].url).netloc if host in self.cache: self.cache[host].abort() self.epoll.unregister(fd) self.cache[host] = self.running.pop(fd) @@ -2126,7 +2146,7 @@ def parallel_wait(meter = 'text'): count += 1 total += opts.size if meter == 'text': - from progress import TextMultiFileMeter + from urlgrabber.progress import TextMultiFileMeter meter = TextMultiFileMeter() meter.start(count, total) @@ -2306,7 +2326,7 @@ class _TH: # Use hostname from URL. If it's a file:// URL, use baseurl. # If no baseurl, do not update timedhosts. - host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl + host = urllib_parse.urlsplit(url).netloc.split('@')[-1] or baseurl if not host: return speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0) @@ -2334,7 +2354,7 @@ class _TH: _TH.load() # Use just the hostname, unless it's a file:// baseurl. - host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl + host = urllib_parse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl default_speed = default_grabber.opts.default_speed try: speed, fail, ts = _TH.hosts[host] @@ -2365,11 +2385,11 @@ def _main_test(): print("throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, default_grabber.bandwidth)) - try: from progress import text_progress_meter + try: from urlgrabber.progress import text_progress_meter except ImportError: pass else: kwargs['progress_obj'] = text_progress_meter() - try: name = apply(urlgrab, (url, filename), kwargs) + try: name = urlgrab(url, filename, **kwargs) except URLGrabError as e: print(e) else: print('LOCAL FILE:', name) @@ -2386,7 +2406,7 @@ def _retry_test(): k, v = string.split(a, '=', 1) kwargs[k] = int(v) - try: from progress import text_progress_meter + try: from urlgrabber.progress import text_progress_meter except ImportError: pass else: kwargs['progress_obj'] = text_progress_meter() @@ -2404,12 +2424,11 @@ def _retry_test(): return kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'}) - try: name = apply(retrygrab, (url, filename), kwargs) + try: name = retrygrab(url, filename, **kwargs) except URLGrabError as e: print(e) else: print('LOCAL FILE:', name) def _file_object_test(filename=None): - import cStringIO if filename is None: filename = __file__ print('using file "%s" for comparisons' % filename) @@ -2421,8 +2440,8 @@ def _file_object_test(filename=None): _test_file_object_readall, _test_file_object_readline, _test_file_object_readlines]: - fo_input = cStringIO.StringIO(s_input) - fo_output = cStringIO.StringIO() + fo_input = StringIO(s_input) + fo_output = StringIO() wrapper = PyCurlFileObject(fo_input, None, 0) print('testing %-30s ' % testfunc.__name__, end='') testfunc(wrapper, fo_output) diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py index 75d9cae..bd76603 100644 --- a/urlgrabber/mirror.py +++ b/urlgrabber/mirror.py @@ -91,10 +91,16 @@ CUSTOMIZATION import random -import thread # needed for locking to make this threadsafe +import sys +if sys.version_info.major < 3: + import thread # needed for locking to make this threadsafe + from types import StringTypes +else: + import _thread as thread + StringTypes = bytes, str -from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 -from grabber import _run_callback, _do_raise +from urlgrabber.grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 +from urlgrabber.grabber import _run_callback, _do_raise def _(st): return st @@ -268,7 +274,7 @@ class MirrorGroup: def _parse_mirrors(self, mirrors): parsed_mirrors = [] for m in mirrors: - if isinstance(m, basestring): + if type(m) in StringTypes: m = {'mirror': _to_utf8(m)} parsed_mirrors.append(m) return parsed_mirrors diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py index 0d4d7bb..9888bef 100644 --- a/urlgrabber/progress.py +++ b/urlgrabber/progress.py @@ -21,11 +21,18 @@ import sys import time import math -import thread import fcntl import struct import termios +if sys.version_info.major < 3: + import thread + from types import StringTypes +else: + import _thread as thread + StringTypes = bytes, str + long = int + # Code from http://mail.python.org/pipermail/python-list/2000-May/033365.html def terminal_width(fd=1): """ Get the real terminal width """ @@ -582,7 +589,7 @@ class TextMultiFileMeter(MultiFileMeter): try: format = "%-30.30s %6.6s %s" fn = meter.basename - if type(message) in (type(''), type(u'')): + if type(message) in StringTypes: message = message.splitlines() if not message: message = [''] out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) @@ -748,7 +755,7 @@ def format_number(number, SI=0, space=' '): depth = depth + 1 number = number / step - if type(number) == type(1) or type(number) == type(1L): + if type(number) in (int, long): # it's an int or a long, which means it didn't get divided, # which means it's already short enough format = '%i%s%s' -- 1.7.4.4 _______________________________________________ Yum-devel mailing list [email protected] http://lists.baseurl.org/mailman/listinfo/yum-devel
