On Tue, 15 Nov 2016 20:38:24 +0000, Jun Wu wrote: > # HG changeset patch > # User Jun Wu <qu...@fb.com> > # Date 1479241551 0 > # Tue Nov 15 20:25:51 2016 +0000 > # Node ID f3d2f4ebc4006043684db52e4487756dd4e2d238 > # Parent d1a0a64f6e16432333bea0476098c46a61222b9b > # Available At https://bitbucket.org/quark-zju/hg-draft > # hg pull https://bitbucket.org/quark-zju/hg-draft -r > f3d2f4ebc400 > util: improve iterfile so it chooses code path wisely
> +if (pyplatform.python_implementation() == 'CPython' and > + sys.version_info < (3, 0)): > + # There is an issue in CPython that some IO methods do not handle EINTR > + # correctly. The following table shows what CPython version (and > functions) > + # are affected (Y: has the EINTR bug, N: otherwise): > + # > + # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0 > + # -------------------------------------------------- > + # fp.__iter__ | Y | Y | N > + # fp.read* | Y | N [1] | N > + # > + # [1]: fixed by hg changeset 67dc99a989cd. > + # > + # Here we workaround the EINTR issue for fileobj.__iter__. Other methods > + # like "read*" are ignored for now, as Python < 2.7.4 is a minority. > + # > + # Although we can workaround the EINTR issue for fp.__iter__, it is > slower: > + # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in > + # CPython 2, because the latter maintains an internal readahead buffer. Do you mean "the former" ? > + # On modern systems like Linux, the "read" syscall cannot be interrupted > + # when reading "fast" files like on-disk files. So the EINTR issue only > + # affects things like pipes, sockets, ttys etc. We treat "normal" > (S_ISREG) > + # files approximately as "fast" files and use the fast (unsafe) code > path, > + # to minimize the performance impact. > + if sys.version_info >= (2, 7, 4): > + # fp.readline deals with EINTR correctly, use it as a workaround > + def _safeiterfile(fp): > + return iter(fp.readline, '') > + else: > + # fp.read* are broken too, manually deal with EINTR in a stupid way > + # note: this may block longer than necessary because of bufsize. > + def _safeiterfile(fp, bufsize=4096): > + fd = fp.fileno() > + line = '' > + while True: > + try: > + buf = os.read(fd, bufsize) > + except OSError as ex: > + if ex.errno == errno.EINTR: > + continue > + else: > + raise > + line += buf > + if '\n' in buf: > + splitted = line.splitlines(True) > + line = '' > + for l in splitted: > + if l[-1] == '\n': > + yield l > + else: > + line = l > + if not buf: > + break Missed the last line if not ends with '\n'. > + def iterfile(fp): > + fastpath = True > + if type(fp) is file: > + fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode) > + if fastpath: > + return fp > + else: > + return _safeiterfile(fp) > +else: > + # PyPy and CPython 3 do not have the EINTR issue thus no workaround > needed. > + def iterfile(fp): > + return fp _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel