Re: [PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
cool, thanks guys! I've sent a new version that should pass all the tests. On Sun, Sep 25, 2016 at 1:03 PM, Jun Wu wrote: > Excerpts from Maciej Fijalkowski's message of 2016-09-25 09:13:29 +0200: >> a proper debugger which is surprisingly hard to use in the case of >> mercurial tests > > If you mean ipdb cannot be used together with run-tests.py, it could be > solved by changing its I/O to /dev/tty, like: > > from IPython.core.debugger import Pdb > originit = Pdb.__init__ > def pdbinit(*args, **kwargs): > fin = open('/dev/tty', 'r') > fout = open('/dev/tty', 'w') > originit(*args, stdin=fin, stdout=fout, **kwargs) > Pdb.__init__ = pdbinit > import ipdb; ipdb.set_trace() ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
./run-tests.py -l -d test.t If you have a breakpoint set, it should just work (unless server processes detached from stdin are involved). > On Sep 25, 2016, at 00:13, Maciej Fijalkowski wrote: > > I must say (and you are going to say that those are idle complaints) > that asserts and prints are far inferior than a proper debugger which > is surprisingly hard to use in the case of mercurial tests > >> On Tue, Sep 20, 2016 at 4:39 PM, Augie Fackler wrote: >>> On Sat, Sep 17, 2016 at 08:23:36PM +0200, Maciej Fijalkowski wrote: >>> This should fix the issues presented. >>> >>> There is one problem which is that the hash in test-rebase-detach >>> changes. The way I see it is just an RNG phase-shift, but it might be >>> a real bug. Some actual input will be appreciated. >> >> It's not possibly RNG related - there's no RNG that goes into the sha1 >> of a node. I've added --debug to the failure in test-rebase-detach and >> compared them between cpython and pypy: >> >> cpython: >> @@ -366,11 +366,24 @@ >> >> >> $ hg log --rev tip --debug >> - changeset: 8:9472f4b1d736 >> + changeset: 8:9472f4b1d7366902d0098aa1401e3f17cc56 >> tag: tip >> + phase: draft >> + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 >> + parent: -1: >> + manifest:8:8dbdcf066a97239fde2d0dba19fcb1e699fa66d2 >> user:test >> date:Thu Jan 01 00:00:00 1970 + >> - summary: Collapsed revision >> + files: F >> + files+: E >> + extra: branch=default >> + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 >> + description: >> + Collapsed revision >> + * I >> + * Merge >> + * J >> + >> >> >> pypy: >> @@ -366,11 +366,24 @@ >> >> >> $ hg log --rev tip --debug >> - changeset: 8:9472f4b1d736 >> + changeset: 8:122ceff3b303b13e5ca323742a8ebe589b9f8066 >> tag: tip >> + phase: draft >> + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 >> + parent: -1: >> + manifest:8:09f1dc369d2667dcaeb9cf828b0fcb76bda29f33 >> user:test >> date:Thu Jan 01 00:00:00 1970 + >> - summary: Collapsed revision >> + files: F >> + files+: E >> + extra: branch=default >> + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 >> + description: >> + Collapsed revision >> + * I >> + * Merge >> + * J >> + >> >> Which means the manifest is hashing out differently. That suggests >> there's a subtle bug in how the manifest is hashing out in the pypy >> version. Adding a `hg debugdata -m 8` (which prints raw manifest >> data), and this is what I get on cpython: >> >> $ hg debugdata -m 8 >> + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) >> + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) >> + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) >> + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) >> >> on pypy: >> $ hg debugdata -m 8 >> + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) >> + F\x0022bfcfd62a21a3287edbd4d656218d0f525ed76a (esc) >> + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) >> + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) >> + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) >> >> >> So you've got a subtle bug here where the compaction of the >> lazymanifest isn't working right - probably around merging manifests >> somehow. Does this give you enough to get started? >> >> (I'd probably start debugging this by littering _lazymanifest._compact >> and _lazymanifest.text with asserts until I managed to trip over the >> un-sorted lines, and then backtrack from there to figure out how they >> managed to survive.) >> >>> >>> >>> On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski wrote: # HG changeset patch # User Maciej Fijalkowski # Date 1473680234 -7200 # Mon Sep 12 13:37:14 2016 +0200 # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 lazymanifest: write a more efficient, pypy friendly version of lazymanifest diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -104,69 +104,297 @@ _checkforbidden(files) return ''.join(lines) -class _lazymanifest(dict): -"""This is the pure implementation of lazymanifest. - -It has not been optimized *at all* and is not lazy. -""" - -def __init__(self, data): -dict.__init__(self) -for f, n, fl in _parse(data): -self[f] = n, fl - -def __setitem__(self, k, v): -node, flag = v -assert node is not None -if len(node) > 21: -node = node[:21] # match c implementation behavior -dict.__set
Re: [PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Excerpts from Maciej Fijalkowski's message of 2016-09-25 09:13:29 +0200: > a proper debugger which is surprisingly hard to use in the case of > mercurial tests If you mean ipdb cannot be used together with run-tests.py, it could be solved by changing its I/O to /dev/tty, like: from IPython.core.debugger import Pdb originit = Pdb.__init__ def pdbinit(*args, **kwargs): fin = open('/dev/tty', 'r') fout = open('/dev/tty', 'w') originit(*args, stdin=fin, stdout=fout, **kwargs) Pdb.__init__ = pdbinit import ipdb; ipdb.set_trace() ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
I must say (and you are going to say that those are idle complaints) that asserts and prints are far inferior than a proper debugger which is surprisingly hard to use in the case of mercurial tests On Tue, Sep 20, 2016 at 4:39 PM, Augie Fackler wrote: > On Sat, Sep 17, 2016 at 08:23:36PM +0200, Maciej Fijalkowski wrote: >> This should fix the issues presented. >> >> There is one problem which is that the hash in test-rebase-detach >> changes. The way I see it is just an RNG phase-shift, but it might be >> a real bug. Some actual input will be appreciated. > > It's not possibly RNG related - there's no RNG that goes into the sha1 > of a node. I've added --debug to the failure in test-rebase-detach and > compared them between cpython and pypy: > > cpython: > @@ -366,11 +366,24 @@ > > >$ hg log --rev tip --debug > - changeset: 8:9472f4b1d736 > + changeset: 8:9472f4b1d7366902d0098aa1401e3f17cc56 >tag: tip > + phase: draft > + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 > + parent: -1: > + manifest:8:8dbdcf066a97239fde2d0dba19fcb1e699fa66d2 >user:test >date:Thu Jan 01 00:00:00 1970 + > - summary: Collapsed revision > + files: F > + files+: E > + extra: branch=default > + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 > + description: > + Collapsed revision > + * I > + * Merge > + * J > + > > > pypy: > @@ -366,11 +366,24 @@ > > >$ hg log --rev tip --debug > - changeset: 8:9472f4b1d736 > + changeset: 8:122ceff3b303b13e5ca323742a8ebe589b9f8066 >tag: tip > + phase: draft > + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 > + parent: -1: > + manifest:8:09f1dc369d2667dcaeb9cf828b0fcb76bda29f33 >user:test >date:Thu Jan 01 00:00:00 1970 + > - summary: Collapsed revision > + files: F > + files+: E > + extra: branch=default > + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 > + description: > + Collapsed revision > + * I > + * Merge > + * J > + > > Which means the manifest is hashing out differently. That suggests > there's a subtle bug in how the manifest is hashing out in the pypy > version. Adding a `hg debugdata -m 8` (which prints raw manifest > data), and this is what I get on cpython: > >$ hg debugdata -m 8 > + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) > + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) > + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) > + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) > > on pypy: >$ hg debugdata -m 8 > + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) > + F\x0022bfcfd62a21a3287edbd4d656218d0f525ed76a (esc) > + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) > + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) > + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) > > > So you've got a subtle bug here where the compaction of the > lazymanifest isn't working right - probably around merging manifests > somehow. Does this give you enough to get started? > > (I'd probably start debugging this by littering _lazymanifest._compact > and _lazymanifest.text with asserts until I managed to trip over the > un-sorted lines, and then backtrack from there to figure out how they > managed to survive.) > >> >> >> >> On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski wrote: >> > # HG changeset patch >> > # User Maciej Fijalkowski >> > # Date 1473680234 -7200 >> > # Mon Sep 12 13:37:14 2016 +0200 >> > # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc >> > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> > lazymanifest: write a more efficient, pypy friendly version of lazymanifest >> > >> > diff --git a/mercurial/manifest.py b/mercurial/manifest.py >> > --- a/mercurial/manifest.py >> > +++ b/mercurial/manifest.py >> > @@ -104,69 +104,297 @@ >> > _checkforbidden(files) >> > return ''.join(lines) >> > >> > -class _lazymanifest(dict): >> > -"""This is the pure implementation of lazymanifest. >> > - >> > -It has not been optimized *at all* and is not lazy. >> > -""" >> > - >> > -def __init__(self, data): >> > -dict.__init__(self) >> > -for f, n, fl in _parse(data): >> > -self[f] = n, fl >> > - >> > -def __setitem__(self, k, v): >> > -node, flag = v >> > -assert node is not None >> > -if len(node) > 21: >> > -node = node[:21] # match c implementation behavior >> > -dict.__setitem__(self, k, (node, flag)) >> > +class lazymanifestiter(object): >> > +def __init__(self, lm): >> > +self.pos = 0 >> > +self.lm = lm >> > >> > def __iter__(self): >> > -return iter(sorted(dict.keys(self))) >> > +return self >> > >> > -def iterkeys(self): >> > -return i
Re: [PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
On Sat, Sep 17, 2016 at 08:23:36PM +0200, Maciej Fijalkowski wrote: > This should fix the issues presented. > > There is one problem which is that the hash in test-rebase-detach > changes. The way I see it is just an RNG phase-shift, but it might be > a real bug. Some actual input will be appreciated. It's not possibly RNG related - there's no RNG that goes into the sha1 of a node. I've added --debug to the failure in test-rebase-detach and compared them between cpython and pypy: cpython: @@ -366,11 +366,24 @@ $ hg log --rev tip --debug - changeset: 8:9472f4b1d736 + changeset: 8:9472f4b1d7366902d0098aa1401e3f17cc56 tag: tip + phase: draft + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 + parent: -1: + manifest:8:8dbdcf066a97239fde2d0dba19fcb1e699fa66d2 user:test date:Thu Jan 01 00:00:00 1970 + - summary: Collapsed revision + files: F + files+: E + extra: branch=default + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 + description: + Collapsed revision + * I + * Merge + * J + pypy: @@ -366,11 +366,24 @@ $ hg log --rev tip --debug - changeset: 8:9472f4b1d736 + changeset: 8:122ceff3b303b13e5ca323742a8ebe589b9f8066 tag: tip + phase: draft + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 + parent: -1: + manifest:8:09f1dc369d2667dcaeb9cf828b0fcb76bda29f33 user:test date:Thu Jan 01 00:00:00 1970 + - summary: Collapsed revision + files: F + files+: E + extra: branch=default + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 + description: + Collapsed revision + * I + * Merge + * J + Which means the manifest is hashing out differently. That suggests there's a subtle bug in how the manifest is hashing out in the pypy version. Adding a `hg debugdata -m 8` (which prints raw manifest data), and this is what I get on cpython: $ hg debugdata -m 8 + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) on pypy: $ hg debugdata -m 8 + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) + F\x0022bfcfd62a21a3287edbd4d656218d0f525ed76a (esc) + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) So you've got a subtle bug here where the compaction of the lazymanifest isn't working right - probably around merging manifests somehow. Does this give you enough to get started? (I'd probably start debugging this by littering _lazymanifest._compact and _lazymanifest.text with asserts until I managed to trip over the un-sorted lines, and then backtrack from there to figure out how they managed to survive.) > > > > On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski wrote: > > # HG changeset patch > > # User Maciej Fijalkowski > > # Date 1473680234 -7200 > > # Mon Sep 12 13:37:14 2016 +0200 > > # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc > > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 > > lazymanifest: write a more efficient, pypy friendly version of lazymanifest > > > > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > > --- a/mercurial/manifest.py > > +++ b/mercurial/manifest.py > > @@ -104,69 +104,297 @@ > > _checkforbidden(files) > > return ''.join(lines) > > > > -class _lazymanifest(dict): > > -"""This is the pure implementation of lazymanifest. > > - > > -It has not been optimized *at all* and is not lazy. > > -""" > > - > > -def __init__(self, data): > > -dict.__init__(self) > > -for f, n, fl in _parse(data): > > -self[f] = n, fl > > - > > -def __setitem__(self, k, v): > > -node, flag = v > > -assert node is not None > > -if len(node) > 21: > > -node = node[:21] # match c implementation behavior > > -dict.__setitem__(self, k, (node, flag)) > > +class lazymanifestiter(object): > > +def __init__(self, lm): > > +self.pos = 0 > > +self.lm = lm > > > > def __iter__(self): > > -return iter(sorted(dict.keys(self))) > > +return self > > > > -def iterkeys(self): > > -return iter(sorted(dict.keys(self))) > > +def next(self): > > +try: > > +data, pos = self.lm._get(self.pos) > > +except IndexError: > > +raise StopIteration > > +if pos == -1: > > +self.pos += 1 > > +return data[0] > > +self.pos += 1 > > +zeropos = data.find('\x00', pos) > > +return data[pos:zeropos] > > > > -def iterentries(self): > > -return ((f, e[0], e[1])
Re: [PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
> On Sep 17, 2016, at 14:23, Maciej Fijalkowski wrote: > > This should fix the issues presented. > > There is one problem which is that the hash in test-rebase-detach > changes. The way I see it is just an RNG phase-shift, but it might be > a real bug. Some actual input will be appreciated. I'll get a look at this in the morning America/New_York - my other miscellany occupied me for far too long today and I just don't have brainpower for this tonight. Sorry to keep you waiting. AF > > > > On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski wrote: >> # HG changeset patch >> # User Maciej Fijalkowski >> # Date 1473680234 -7200 >> # Mon Sep 12 13:37:14 2016 +0200 >> # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc >> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> lazymanifest: write a more efficient, pypy friendly version of lazymanifest >> >> diff --git a/mercurial/manifest.py b/mercurial/manifest.py >> --- a/mercurial/manifest.py >> +++ b/mercurial/manifest.py >> @@ -104,69 +104,297 @@ >> _checkforbidden(files) >> return ''.join(lines) >> >> -class _lazymanifest(dict): >> -"""This is the pure implementation of lazymanifest. >> - >> -It has not been optimized *at all* and is not lazy. >> -""" >> - >> -def __init__(self, data): >> -dict.__init__(self) >> -for f, n, fl in _parse(data): >> -self[f] = n, fl >> - >> -def __setitem__(self, k, v): >> -node, flag = v >> -assert node is not None >> -if len(node) > 21: >> -node = node[:21] # match c implementation behavior >> -dict.__setitem__(self, k, (node, flag)) >> +class lazymanifestiter(object): >> +def __init__(self, lm): >> +self.pos = 0 >> +self.lm = lm >> >> def __iter__(self): >> -return iter(sorted(dict.keys(self))) >> +return self >> >> -def iterkeys(self): >> -return iter(sorted(dict.keys(self))) >> +def next(self): >> +try: >> +data, pos = self.lm._get(self.pos) >> +except IndexError: >> +raise StopIteration >> +if pos == -1: >> +self.pos += 1 >> +return data[0] >> +self.pos += 1 >> +zeropos = data.find('\x00', pos) >> +return data[pos:zeropos] >> >> -def iterentries(self): >> -return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) >> +class lazymanifestiterentries(object): >> +def __init__(self, lm): >> +self.lm = lm >> +self.pos = 0 >> + >> +def __iter__(self): >> +return self >> + >> +def next(self): >> +try: >> +data, pos = self.lm._get(self.pos) >> +except IndexError: >> +raise StopIteration >> +if pos == -1: >> +self.pos += 1 >> +return data >> +zeropos = data.find('\x00', pos) >> +hashval = unhexlify(data, self.lm.extrainfo[self.pos], >> +zeropos + 1, 40) >> +flags = self.lm._getflags(data, self.pos, zeropos) >> +self.pos += 1 >> +return (data[pos:zeropos], hashval, flags) >> + >> +def unhexlify(data, extra, pos, length): >> +s = data[pos:pos + length].decode('hex') >> +if extra: >> +s += chr(extra & 0xff) >> +return s >> + >> +def _cmp(a, b): >> +return (a > b) - (a < b) >> + >> +class _lazymanifest(object): >> +def __init__(self, data, positions=None, extrainfo=None, >> extradata=None): >> +if positions is None: >> +self.positions = self.findlines(data) >> +self.extrainfo = [0] * len(self.positions) >> +self.data = data >> +self.extradata = [] >> +else: >> +self.positions = positions[:] >> +self.extrainfo = extrainfo[:] >> +self.extradata = extradata[:] >> +self.data = data >> + >> +def findlines(self, data): >> +if not data: >> +return [] >> +pos = data.find("\n") >> +positions = [0] >> +while pos < len(data) - 1 and pos != -1: >> +positions.append(pos + 1) >> +pos = data.find("\n", pos + 1) >> +return positions >> + >> +def _get(self, index): >> +# get the position encoded in pos: >> +# positive number is an index in 'data' >> +# negative number is in extrapieces >> +pos = self.positions[index] >> +if pos >= 0: >> +return self.data, pos >> +return self.extradata[-pos - 1], -1 >> + >> +def _getkey(self, pos): >> +if pos >= 0: >> +return self.data[pos:self.data.find('\x00', pos + 1)] >> +return self.extradata[-pos - 1][0] >> + >> +def bsearch(self, key): >> +first = 0 >> +last = len(self.positions) - 1 >> + >> +while first <= last: >> +midpoint = (first + last)//2 >> +nextpos = self.positions[midpoint] >> +
Re: [PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
This should fix the issues presented. There is one problem which is that the hash in test-rebase-detach changes. The way I see it is just an RNG phase-shift, but it might be a real bug. Some actual input will be appreciated. On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski wrote: > # HG changeset patch > # User Maciej Fijalkowski > # Date 1473680234 -7200 > # Mon Sep 12 13:37:14 2016 +0200 > # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 > lazymanifest: write a more efficient, pypy friendly version of lazymanifest > > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > --- a/mercurial/manifest.py > +++ b/mercurial/manifest.py > @@ -104,69 +104,297 @@ > _checkforbidden(files) > return ''.join(lines) > > -class _lazymanifest(dict): > -"""This is the pure implementation of lazymanifest. > - > -It has not been optimized *at all* and is not lazy. > -""" > - > -def __init__(self, data): > -dict.__init__(self) > -for f, n, fl in _parse(data): > -self[f] = n, fl > - > -def __setitem__(self, k, v): > -node, flag = v > -assert node is not None > -if len(node) > 21: > -node = node[:21] # match c implementation behavior > -dict.__setitem__(self, k, (node, flag)) > +class lazymanifestiter(object): > +def __init__(self, lm): > +self.pos = 0 > +self.lm = lm > > def __iter__(self): > -return iter(sorted(dict.keys(self))) > +return self > > -def iterkeys(self): > -return iter(sorted(dict.keys(self))) > +def next(self): > +try: > +data, pos = self.lm._get(self.pos) > +except IndexError: > +raise StopIteration > +if pos == -1: > +self.pos += 1 > +return data[0] > +self.pos += 1 > +zeropos = data.find('\x00', pos) > +return data[pos:zeropos] > > -def iterentries(self): > -return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) > +class lazymanifestiterentries(object): > +def __init__(self, lm): > +self.lm = lm > +self.pos = 0 > + > +def __iter__(self): > +return self > + > +def next(self): > +try: > +data, pos = self.lm._get(self.pos) > +except IndexError: > +raise StopIteration > +if pos == -1: > +self.pos += 1 > +return data > +zeropos = data.find('\x00', pos) > +hashval = unhexlify(data, self.lm.extrainfo[self.pos], > +zeropos + 1, 40) > +flags = self.lm._getflags(data, self.pos, zeropos) > +self.pos += 1 > +return (data[pos:zeropos], hashval, flags) > + > +def unhexlify(data, extra, pos, length): > +s = data[pos:pos + length].decode('hex') > +if extra: > +s += chr(extra & 0xff) > +return s > + > +def _cmp(a, b): > +return (a > b) - (a < b) > + > +class _lazymanifest(object): > +def __init__(self, data, positions=None, extrainfo=None, extradata=None): > +if positions is None: > +self.positions = self.findlines(data) > +self.extrainfo = [0] * len(self.positions) > +self.data = data > +self.extradata = [] > +else: > +self.positions = positions[:] > +self.extrainfo = extrainfo[:] > +self.extradata = extradata[:] > +self.data = data > + > +def findlines(self, data): > +if not data: > +return [] > +pos = data.find("\n") > +positions = [0] > +while pos < len(data) - 1 and pos != -1: > +positions.append(pos + 1) > +pos = data.find("\n", pos + 1) > +return positions > + > +def _get(self, index): > +# get the position encoded in pos: > +# positive number is an index in 'data' > +# negative number is in extrapieces > +pos = self.positions[index] > +if pos >= 0: > +return self.data, pos > +return self.extradata[-pos - 1], -1 > + > +def _getkey(self, pos): > +if pos >= 0: > +return self.data[pos:self.data.find('\x00', pos + 1)] > +return self.extradata[-pos - 1][0] > + > +def bsearch(self, key): > +first = 0 > +last = len(self.positions) - 1 > + > +while first <= last: > +midpoint = (first + last)//2 > +nextpos = self.positions[midpoint] > +candidate = self._getkey(nextpos) > +r = _cmp(key, candidate) > +if r == 0: > +return midpoint > +else: > +if r < 0: > +last = midpoint - 1 > +else: > +first = midpoint + 1 > +return -1 > + > +def bsearch2(self, key): > +# same as the above, but will always retu
[PATCH v2] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
# HG changeset patch # User Maciej Fijalkowski # Date 1473680234 -7200 # Mon Sep 12 13:37:14 2016 +0200 # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 lazymanifest: write a more efficient, pypy friendly version of lazymanifest diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -104,69 +104,297 @@ _checkforbidden(files) return ''.join(lines) -class _lazymanifest(dict): -"""This is the pure implementation of lazymanifest. - -It has not been optimized *at all* and is not lazy. -""" - -def __init__(self, data): -dict.__init__(self) -for f, n, fl in _parse(data): -self[f] = n, fl - -def __setitem__(self, k, v): -node, flag = v -assert node is not None -if len(node) > 21: -node = node[:21] # match c implementation behavior -dict.__setitem__(self, k, (node, flag)) +class lazymanifestiter(object): +def __init__(self, lm): +self.pos = 0 +self.lm = lm def __iter__(self): -return iter(sorted(dict.keys(self))) +return self -def iterkeys(self): -return iter(sorted(dict.keys(self))) +def next(self): +try: +data, pos = self.lm._get(self.pos) +except IndexError: +raise StopIteration +if pos == -1: +self.pos += 1 +return data[0] +self.pos += 1 +zeropos = data.find('\x00', pos) +return data[pos:zeropos] -def iterentries(self): -return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) +class lazymanifestiterentries(object): +def __init__(self, lm): +self.lm = lm +self.pos = 0 + +def __iter__(self): +return self + +def next(self): +try: +data, pos = self.lm._get(self.pos) +except IndexError: +raise StopIteration +if pos == -1: +self.pos += 1 +return data +zeropos = data.find('\x00', pos) +hashval = unhexlify(data, self.lm.extrainfo[self.pos], +zeropos + 1, 40) +flags = self.lm._getflags(data, self.pos, zeropos) +self.pos += 1 +return (data[pos:zeropos], hashval, flags) + +def unhexlify(data, extra, pos, length): +s = data[pos:pos + length].decode('hex') +if extra: +s += chr(extra & 0xff) +return s + +def _cmp(a, b): +return (a > b) - (a < b) + +class _lazymanifest(object): +def __init__(self, data, positions=None, extrainfo=None, extradata=None): +if positions is None: +self.positions = self.findlines(data) +self.extrainfo = [0] * len(self.positions) +self.data = data +self.extradata = [] +else: +self.positions = positions[:] +self.extrainfo = extrainfo[:] +self.extradata = extradata[:] +self.data = data + +def findlines(self, data): +if not data: +return [] +pos = data.find("\n") +positions = [0] +while pos < len(data) - 1 and pos != -1: +positions.append(pos + 1) +pos = data.find("\n", pos + 1) +return positions + +def _get(self, index): +# get the position encoded in pos: +# positive number is an index in 'data' +# negative number is in extrapieces +pos = self.positions[index] +if pos >= 0: +return self.data, pos +return self.extradata[-pos - 1], -1 + +def _getkey(self, pos): +if pos >= 0: +return self.data[pos:self.data.find('\x00', pos + 1)] +return self.extradata[-pos - 1][0] + +def bsearch(self, key): +first = 0 +last = len(self.positions) - 1 + +while first <= last: +midpoint = (first + last)//2 +nextpos = self.positions[midpoint] +candidate = self._getkey(nextpos) +r = _cmp(key, candidate) +if r == 0: +return midpoint +else: +if r < 0: +last = midpoint - 1 +else: +first = midpoint + 1 +return -1 + +def bsearch2(self, key): +# same as the above, but will always return the position +# done for performance reasons +first = 0 +last = len(self.positions) - 1 + +while first <= last: +midpoint = (first + last)//2 +nextpos = self.positions[midpoint] +candidate = self._getkey(nextpos) +r = _cmp(key, candidate) +if r == 0: +return (midpoint, True) +else: +if r < 0: +last = midpoint - 1 +else: +first = midpoint + 1 +return (first, False) +