marmoute created this revision. Herald added subscribers: mercurial-devel, mjpieters. Herald added a reviewer: hg-reviewers.
REVISION SUMMARY This python code aims to be as "simple" as possible. It is a reference implementation of the data we are going to serialize on disk (and possibly, later a way for pure python install to make sure the on disk data are up to date). It is not optimized for performance and rebuild the full data structure from the index every time. This is a stepping stone toward a persistent nodemap on disk. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D7834 AFFECTED FILES mercurial/debugcommands.py mercurial/revlogutils/nodemap.py tests/test-completion.t tests/test-help.t tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t new file mode 100644 --- /dev/null +++ b/tests/test-persistent-nodemap.t @@ -0,0 +1,26 @@ +=================================== +Test the persistent on-disk nodemap +=================================== + + + $ hg init test-repo + $ cd test-repo + $ hg debugbuilddag .+5000 + $ hg debugnodemap --dump | f --sha256 --bytes=256 --hexdump --size + size=245760, sha256=5dbe62ab98a26668b544063d4d674ac4452ba903ee8895c52fd21d9bbd771e09 + 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0030: ff ff ff ff ff ff fa c6 ff ff ff ff ff ff ff ff |................| + 0040: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ed b7 |................| + 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 0080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ee 38 |...............8| + 0090: 00 00 00 00 00 00 00 00 ff ff ff ff ff ff ff ff |................| + 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00b0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00c0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -1017,6 +1017,7 @@ print merge state debugnamecomplete complete "names" - tags, open branch names, bookmark names + debugnodemap write and inspect on disk nodemap debugobsolete create arbitrary obsolete marker debugoptADV (no help text available) diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -107,6 +107,7 @@ debugmanifestfulltextcache debugmergestate debugnamecomplete + debugnodemap debugobsolete debugp1copies debugp2copies @@ -289,6 +290,7 @@ debugmanifestfulltextcache: clear, add debugmergestate: debugnamecomplete: + debugnodemap: dump debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template debugp1copies: rev debugp2copies: rev diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -7,9 +7,156 @@ # GNU General Public License version 2 or any later version. from __future__ import absolute_import -from .. import error + +import struct + +from .. import ( + error, + node as nodemod, +) class NodeMap(dict): def __missing__(self, x): raise error.RevlogError(b'unknown node: %s' % x) + + +### Nodemap Trie +# +# This is a simple reference implementation to compute and serialise a nodemap +# trie. This reference implementation is write only. The python version of this +# is not expected to be actually used, since it wont provide performance +# improvement over existing non-persistent C implementation. +# +# The nodemap is serialized as Trie using 4bits-address/16-entries block. each +# revision can be adressed using its node shortest prefix. +# +# The trie is stored as a sequence of block. Each block contains 16 entries +# (signed 64bit integer, big endian). Each entry can be one of the following: +# +# * value >= 0 -> index of sub-block +# * value == -1 -> no value +# * value < -1 -> a revision value: rev = -(value+10) +# +# The implementation focus on simplicity, not on performance. A Rust +# implementation should provide a efficient version of the same serialization. +# This reference python implementation is never meant to be extensively use in +# production. + + +def persistent_data(index): + """return the serialised data of a nodemap for a given index + """ + trie = _build_trie(index) + return _dump_trie(trie) + + +S_BLOCK = struct.Struct(">" + ("q" * 16)) + +NO_ENTRY = -1 +# rev-0 need to be -2 because 0 is used by block, -1 is a special value. +REV_OFFSET = -2 + + +def _transform_rev(rev): + """Return the number used to represent the rev in the tree. + + (or retrieve a rev number from such representation) + + Note that this is an involution. + """ + return -(rev + REV_OFFSET) + + +def _to_int(hex_digit): + """turn an hexadecimal digit into a proper integer""" + return int(hex_digit, 16) + + +def _build_trie(index): + """build a nodemap trie + + The nodemap store revision number for each unique prefix. + + Each block is a dictionnary with key in `[0, 15]`. Value are either + another block or a revision number. + """ + root = {} + for rev in range(len(index)): + hex = nodemod.hex(index[rev][7]) + _insert_into_block(index, 0, root, rev, hex) + return root + + +def _insert_into_block(index, level, block, current_rev, current_hex): + """insert a new revision in a block + + index: the index we are adding revision for + level: the depth of the current block in the trie + block: the block currently being considered + current_rev: the revision number we are adding + current_hex: the hexadecimal representation of the of that revision + """ + entry = block.get(_to_int(current_hex[level])) + if entry is None: + # no entry, simply store the revision number + block[_to_int(current_hex[level])] = current_rev + elif isinstance(entry, dict): + # need to recurse to an underlying block + _insert_into_block(index, level + 1, entry, current_rev, current_hex) + else: + # collision with a previously unique prefix, inserting new + # vertices to fit both entry. + other_hex = nodemod.hex(index[entry][7]) + other_rev = entry + while current_hex[level] == other_hex[level]: + new = {} + block[_to_int(current_hex[level])] = new + block = new + level += 1 + block[_to_int(current_hex[level])] = current_rev + block[_to_int(other_hex[level])] = other_rev + + +def _dump_trie(root): + """serialise a nodemap trie + + See `_build_trie` for nodemap trie structure""" + block_map = {} + chunks = [] + for tn in _walk_trie(root): + block_map[id(tn)] = len(chunks) + chunks.append(_dump_block(tn, block_map)) + return ''.join(chunks) + + +def _walk_trie(block): + """yield all the block in a trie + + Children blocks are always yield before their parent block. + """ + for (_, item) in sorted(block.items()): + if isinstance(item, dict): + for sub_block in _walk_trie(item): + yield sub_block + yield block + + +def _dump_block(block_node, block_map): + """serialise a single block + + Children block are assumed to be already serialized and present in + block_map. + """ + data = tuple(_to_value(block_node.get(i), block_map) for i in range(16)) + return S_BLOCK.pack(*data) + + +def _to_value(item, block_map): + """serialize any value as an integer""" + if item is None: + return NO_ENTRY + elif isinstance(item, dict): + return block_map[id(item)] + else: + return _transform_rev(item) diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -93,7 +93,10 @@ stringutil, ) -from .revlogutils import deltas as deltautil +from .revlogutils import ( + deltas as deltautil, + nodemap, +) release = lockmod.release @@ -2075,6 +2078,20 @@ @command( + b'debugnodemap', + [('', b'dump', False, _(b'write a (binary) serialised nodemap on stdin'))], +) +def debugnodemap(ui, repo, **args): + """write and inspect on disk nodemap + """ + if args['dump']: + unfi = repo.unfiltered() + cl = unfi.changelog + data = nodemap.persistent_data(cl.index) + ui.write(data) + + +@command( b'debugobsolete', [ (b'', b'flags', 0, _(b'markers flag')), To: marmoute, #hg-reviewers Cc: mjpieters, mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel