# HG changeset patch # User Yuya Nishihara <y...@tcha.org> # Date 1520855110 -32400 # Mon Mar 12 20:45:10 2018 +0900 # Node ID 2d8154d8b27e0452aee52f45d48370c7ca9f837b # Parent 66e64681e0a8ccbf3576d99ceb51f7259e352689 annotate: do not construct attr.s object per line while computing history
Unfortunately, good abstraction has a cost. It's way slower to construct an annotateline() object than creating a plain tuple or a list. This patch changes the internal data structure from row-based to columnar, so the decorate() function can be instant (i.e. no Python in hot loop.) For code readability, the outermost tuple is switched to an attr.s object instead. (original, row-based attr.s) $ hg annot mercurial/commands.py --time > /dev/null time: real 11.470 secs (user 11.400+0.000 sys 0.070+0.000) $ hg annot mercurial/commands.py --time --line-number > /dev/null time: real 39.590 secs (user 39.500+0.000 sys 0.080+0.000) (this patch, columnar) $ hg annot mercurial/commands.py --time > /dev/null time: real 11.780 secs (user 11.710+0.000 sys 0.070+0.000) $ hg annot mercurial/commands.py --time --line-number > /dev/null time: real 12.240 secs (user 12.170+0.000 sys 0.090+0.000) (cf. 4.3.3, row-based tuple) $ hg annot mercurial/commands.py --time --line-number > /dev/null time: real 19.540 secs (user 19.460+0.000 sys 0.080+0.000) diff --git a/mercurial/dagop.py b/mercurial/dagop.py --- a/mercurial/dagop.py +++ b/mercurial/dagop.py @@ -369,6 +369,15 @@ class annotateline(object): # Whether this annotation was the result of a skip-annotate. skip = attr.ib(default=False) +@attr.s(slots=True, frozen=True) +class _annotatedfile(object): + # list indexed by lineno - 1 + fctxs = attr.ib() + linenos = attr.ib() + skips = attr.ib() + # full file content + text = attr.ib() + def _countlines(text): if text.endswith("\n"): return text.count("\n") @@ -385,7 +394,7 @@ def _annotatepair(parents, childfctx, ch See test-annotate.py for unit tests. ''' - pblocks = [(parent, mdiff.allblocks(parent[1], child[1], opts=diffopts)) + pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts)) for parent in parents] if skipchild: @@ -398,7 +407,9 @@ def _annotatepair(parents, childfctx, ch # Changed blocks ('!') or blocks made only of blank lines ('~') # belong to the child. if t == '=': - child[0][b1:b2] = parent[0][a1:a2] + child.fctxs[b1:b2] = parent.fctxs[a1:a2] + child.linenos[b1:b2] = parent.linenos[a1:a2] + child.skips[b1:b2] = parent.skips[a1:a2] if skipchild: # Now try and match up anything that couldn't be matched, @@ -419,9 +430,11 @@ def _annotatepair(parents, childfctx, ch for (a1, a2, b1, b2), _t in blocks: if a2 - a1 >= b2 - b1: for bk in xrange(b1, b2): - if child[0][bk].fctx == childfctx: + if child.fctxs[bk] == childfctx: ak = min(a1 + (bk - b1), a2 - 1) - child[0][bk] = attr.evolve(parent[0][ak], skip=True) + child.fctxs[bk] = parent.fctxs[ak] + child.linenos[bk] = parent.linenos[ak] + child.skips[bk] = True else: remaining[idx][1].append((a1, a2, b1, b2)) @@ -430,9 +443,11 @@ def _annotatepair(parents, childfctx, ch for parent, blocks in remaining: for a1, a2, b1, b2 in blocks: for bk in xrange(b1, b2): - if child[0][bk].fctx == childfctx: + if child.fctxs[bk] == childfctx: ak = min(a1 + (bk - b1), a2 - 1) - child[0][bk] = attr.evolve(parent[0][ak], skip=True) + child.fctxs[bk] = parent.fctxs[ak] + child.linenos[bk] = parent.linenos[ak] + child.skips[bk] = True return child def annotate(base, parents, linenumber=False, skiprevs=None, diffopts=None): @@ -443,11 +458,13 @@ def annotate(base, parents, linenumber=F if linenumber: def decorate(text, fctx): - return ([annotateline(fctx=fctx, lineno=i) - for i in xrange(1, _countlines(text) + 1)], text) + n = _countlines(text) + linenos = pycompat.rangelist(1, n + 1) + return _annotatedfile([fctx] * n, linenos, [False] * n, text) else: def decorate(text, fctx): - return ([annotateline(fctx=fctx)] * _countlines(text), text) + n = _countlines(text) + return _annotatedfile([fctx] * n, [False] * n, [False] * n, text) # This algorithm would prefer to be recursive, but Python is a # bit recursion-hostile. Instead we do an iterative @@ -501,8 +518,10 @@ def annotate(base, parents, linenumber=F hist[f] = curr del pcache[f] - lineattrs, text = hist[base] - return pycompat.ziplist(lineattrs, mdiff.splitnewlines(text)) + a = hist[base] + return [(annotateline(fctx, lineno, skip), line) + for fctx, lineno, skip, line + in zip(a.fctxs, a.linenos, a.skips, mdiff.splitnewlines(a.text))] def toposort(revs, parentsfunc, firstbranch=()): """Yield revisions from heads to roots one (topo) branch at a time. diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py --- a/mercurial/pycompat.py +++ b/mercurial/pycompat.py @@ -71,6 +71,9 @@ if ispy3: def maplist(*args): return list(map(*args)) + def rangelist(*args): + return list(range(*args)) + def ziplist(*args): return list(zip(*args)) @@ -348,6 +351,7 @@ else: bytesio = cStringIO.StringIO stringio = bytesio maplist = map + rangelist = range ziplist = zip rawinput = raw_input getargspec = inspect.getargspec diff --git a/tests/test-annotate.py b/tests/test-annotate.py --- a/tests/test-annotate.py +++ b/tests/test-annotate.py @@ -5,12 +5,18 @@ import unittest from mercurial import ( mdiff, + pycompat, ) from mercurial.dagop import ( annotateline, + _annotatedfile, _annotatepair, ) +def tr(a): + return [annotateline(fctx, lineno, skip) + for fctx, lineno, skip in zip(a.fctxs, a.linenos, a.skips)] + class AnnotateTests(unittest.TestCase): """Unit tests for annotate code.""" @@ -26,16 +32,16 @@ class AnnotateTests(unittest.TestCase): diffopts = mdiff.diffopts() def decorate(text, fctx): - return ([annotateline(fctx=fctx, lineno=i) - for i in range(1, text.count(b'\n') + 1)], - text) + n = text.count(b'\n') + linenos = pycompat.rangelist(1, n + 1) + return _annotatedfile([fctx] * n, linenos, [False] * n, text) # Basic usage oldann = decorate(olddata, oldfctx) p1ann = decorate(p1data, p1fctx) p1ann = _annotatepair([oldann], p1fctx, p1ann, False, diffopts) - self.assertEqual(p1ann[0], [ + self.assertEqual(tr(p1ann), [ annotateline(b'old', 1), annotateline(b'old', 2), annotateline(b'p1', 3), @@ -43,7 +49,7 @@ class AnnotateTests(unittest.TestCase): p2ann = decorate(p2data, p2fctx) p2ann = _annotatepair([oldann], p2fctx, p2ann, False, diffopts) - self.assertEqual(p2ann[0], [ + self.assertEqual(tr(p2ann), [ annotateline(b'old', 1), annotateline(b'p2', 2), annotateline(b'p2', 3), @@ -54,7 +60,7 @@ class AnnotateTests(unittest.TestCase): childann = decorate(childdata, childfctx) childann = _annotatepair([p1ann, p2ann], childfctx, childann, False, diffopts) - self.assertEqual(childann[0], [ + self.assertEqual(tr(childann), [ annotateline(b'old', 1), annotateline(b'c', 2), annotateline(b'p2', 2), @@ -65,7 +71,7 @@ class AnnotateTests(unittest.TestCase): childann = decorate(childdata, childfctx) childann = _annotatepair([p2ann, p1ann], childfctx, childann, False, diffopts) - self.assertEqual(childann[0], [ + self.assertEqual(tr(childann), [ annotateline(b'old', 1), annotateline(b'c', 2), annotateline(b'p1', 3), @@ -78,7 +84,7 @@ class AnnotateTests(unittest.TestCase): childann = decorate(childdata, childfctx) childann = _annotatepair([p1ann, p2ann], childfctx, childann, True, diffopts) - self.assertEqual(childann[0], [ + self.assertEqual(tr(childann), [ annotateline(b'old', 1), annotateline(b'old', 2, True), # note that this line was carried over from earlier so it is *not* @@ -91,7 +97,7 @@ class AnnotateTests(unittest.TestCase): childann = decorate(childdata, childfctx) childann = _annotatepair([p2ann, p1ann], childfctx, childann, True, diffopts) - self.assertEqual(childann[0], [ + self.assertEqual(tr(childann), [ annotateline(b'old', 1), annotateline(b'old', 2, True), annotateline(b'p1', 3), _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel