# HG changeset patch # User Gregory Szorc <gregory.sz...@gmail.com> # Date 1476648175 25200 # Sun Oct 16 13:02:55 2016 -0700 # Node ID ee7144d686de700d60cb2a78adac3aab43f2a71d # Parent 735adf139447190fe52c65ec6e9b4e5e0c81d6aa changegroup: reuse revlog file handle when generating group
Previously, every time we needed new data from a revlog during changegroup generation, a new file handle would be opened, seeked, read, and closed. After this patch, we use the just-added context manager on the revlog class to cache and reuse a file handle for the duration of the changegroup operation. When generating a v2 bundle on the mozilla-unified repo, this change causes the following changes to system call counts: Function Before After Delta read 576,062 576,085 +23 open 274,939 269,167 -5,772 fstat 808,762 797,216 -11,546 write 952,449 952,473 +24 lseek 536,450 536,452 +2 close 272,314 266,542 -5,772 lstat 20,185 20,185 0 It's worth noting that this repo has 265,773 revlog files (.i + .d) but only 941 of them are non-inline. That means of the non-inline revlogs, we're preventing multiple redundant opens per revlog on average. (Most of the prevented opens are on changelog and manifest.) On my Linux machine, this change appears to show no improvement in wall time. However, fewer system calls is fewer system calls. And when I/O is involved, I think aiming for 0 system calls is worthwhile. diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -560,23 +560,25 @@ class cg1packer(object): # add the parent of the first rev p = revlog.parentrevs(revs[0])[0] revs.insert(0, p) # build deltas total = len(revs) - 1 msgbundling = _('bundling') - for r in xrange(len(revs) - 1): - if units is not None: - self._progress(msgbundling, r + 1, unit=units, total=total) - prev, curr = revs[r], revs[r + 1] - linknode = lookup(revlog.node(curr)) - for c in self.revchunk(revlog, curr, prev, linknode): - yield c + + with revlog.cachefilehandle(): + for r in xrange(len(revs) - 1): + if units is not None: + self._progress(msgbundling, r + 1, unit=units, total=total) + prev, curr = revs[r], revs[r + 1] + linknode = lookup(revlog.node(curr)) + for c in self.revchunk(revlog, curr, prev, linknode): + yield c if units is not None: self._progress(msgbundling, None) yield self.close() # filter any nodes that claim to be part of the known set def prune(self, revlog, missing, commonrevs): rr, rl = revlog.rev, revlog.linkrev _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel