On Fri, 2011-10-21 at 16:28 +0200, Zdeněk Pavlas wrote:
> When executed with a single argument 'DOWNLOADER', grabber.py
> parses download requests on stdin, and reports the results to stdout.
> ---
> urlgrabber/grabber.py | 57
> ++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 56 insertions(+), 1 deletions(-)
Ok, so this is the contained external downloader ...
> diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
> index b64c943..6d75c31 100644
> --- a/urlgrabber/grabber.py
> +++ b/urlgrabber/grabber.py
> @@ -455,7 +455,7 @@ import pycurl
> from ftplib import parse150
> from StringIO import StringIO
> from httplib import HTTPException
> -import socket
> +import socket, select
> from byterange import range_tuple_normalize, range_tuple_to_header,
> RangeError
>
> try:
> @@ -1899,6 +1899,58 @@ class _DirectDownloader:
> fo._do_close_fo()
> os.unlink(fo.opts.filename)
>
> +class _ProxyProgress:
> + def start(*d1, **d2): pass
> + def update(self, _amount_read):
> + os.write(1, '%d %d\n' % (self._id, _amount_read))
Don't you need "end" here too?
> +import simplejson
Is this really necessary ... how big is the cost?
> +def download_process():
> + ''' Download process
> + - watch stdin for new requests, parse & issue em.
> + - use ProxyProgress to send _amount_read during dl.
> + - abort on EOF.
> + '''
> + dl = _DirectDownloader()
> + cnt = tout = 0
> + while True:
> + fdset = dl.multi.fdset()
> + fdset[0].append(0)
> + if 0 in select.select(*(fdset + (tout,)))[0]:
Again, select.poll() code is going to be 666 times easier to read.
> + buf = os.read(0, 4096)
> + if not buf: break # EOF
> + while buf:
> + try: line, buf = buf.split('\n', 1)
> + except ValueError:
> + buf += os.read(0, 4096)
> + continue
This is basically a blocking readline() call, which we can probably
live with (although it can suck). But at least put it behind some
method.
> + # start new download
> + cnt += 1
> + opts = URLGrabberOptions()
> + opts._id = cnt
> + opts.progress_obj = _ProxyProgress()
> + opts.progress_obj._id = cnt
> + for k in line.split(' '):
> + k, v = k.split('=', 1)
> + v = urllib.unquote(v)
> + v = simplejson.loads(v)
> + setattr(opts, k, v)
> + dl.start(opts)
> +
> + # XXX: likely a CurlMulti() bug
> + # fdset() is empty shortly after starting new request.
> + # Do some polling to work this around.
> + tout = 10e-3
Shocker, workarounds for CurlMulti weirdness.
> + # perform requests
> + for opts, ug_err, _amount_read in dl.perform():
> + ug_err = ug_err and '%d %s' % ug_err.args or 'OK'
> + os.write(1, '%d %d %s\n' % (opts._id, _amount_read, ug_err))
> + tout = min(tout * 1.1, 5)
> + dl.abort()
> + sys.exit(0)
> +
>
> #####################################################################
> # High level async API
> @@ -2122,6 +2174,9 @@ def _test_file_object_readlines(wrapper, fo_output):
> fo_output.write(string.join(li, ''))
>
> if __name__ == '__main__':
> + if sys.argv[1:] == ['DOWNLOADER']:
> + download_process()
Is it a big benefit to use __file__ instead of creating something in
libexec/whatever?
> _main_test()
> _retry_test()
> _file_object_test('test')
_______________________________________________
Yum-devel mailing list
[email protected]
http://lists.baseurl.org/mailman/listinfo/yum-devel