Closed by commit rHG50ad851efd9b: nodemap: introduce append-only incremental update of the persistent data (authored by marmoute). This revision was automatically updated to reflect the committed changes. This revision was not accepted when it landed; it landed in state "Needs Review".
REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D7886?vs=19898&id=20119 CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 AFFECTED FILES mercurial/pure/parsers.py mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -49,8 +49,19 @@ $ hg ci -m 'foo' $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + +(The pure code use the debug code that perform incremental update, the C code reencode from scratch) + +#if pure + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob) + +#else $ f --sha256 .hg/store/00changelog-*.nd --size .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob) + +#endif + $ hg debugnodemap --check revision in index: 5002 revision in nodemap: 5002 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -69,12 +69,41 @@ if revlog.nodemap_file is None: msg = "calling persist nodemap on a revlog without the feature enableb" raise error.ProgrammingError(msg) - if util.safehasattr(revlog.index, "nodemap_data_all"): - data = revlog.index.nodemap_data_all() + + can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") + ondisk_docket = revlog._nodemap_docket + + # first attemp an incremental update of the data + if can_incremental and ondisk_docket is not None: + target_docket = revlog._nodemap_docket.copy() + data = revlog.index.nodemap_data_incremental() + datafile = _rawdata_filepath(revlog, target_docket) + # EXP-TODO: if this is a cache, this should use a cache vfs, not a + # store vfs + with revlog.opener(datafile, b'a') as fd: + fd.write(data) else: - data = persistent_data(revlog.index) - target_docket = NodeMapDocket() - datafile = _rawdata_filepath(revlog, target_docket) + # otherwise fallback to a full new export + target_docket = NodeMapDocket() + datafile = _rawdata_filepath(revlog, target_docket) + if util.safehasattr(revlog.index, "nodemap_data_all"): + data = revlog.index.nodemap_data_all() + else: + data = persistent_data(revlog.index) + # EXP-TODO: if this is a cache, this should use a cache vfs, not a + # store vfs + with revlog.opener(datafile, b'w') as fd: + fd.write(data) + # EXP-TODO: if this is a cache, this should use a cache vfs, not a + # store vfs + with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: + fp.write(target_docket.serialize()) + revlog._nodemap_docket = target_docket + # EXP-TODO: if the transaction abort, we should remove the new data and + # reinstall the old one. + + # search for old index file in all cases, some older process might have + # left one behind. olds = _other_rawdata_filepath(revlog, target_docket) if olds: realvfs = getattr(revlog, '_realopener', revlog.opener) @@ -85,17 +114,6 @@ callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file tr.addpostclose(callback_id, cleanup) - # EXP-TODO: if this is a cache, this should use a cache vfs, not a - # store vfs - with revlog.opener(datafile, b'w') as fd: - fd.write(data) - # EXP-TODO: if this is a cache, this should use a cache vfs, not a - # store vfs - with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: - fp.write(target_docket.serialize()) - revlog._nodemap_docket = target_docket - # EXP-TODO: if the transaction abort, we should remove the new data and - # reinstall the old one. ### Nodemap docket file @@ -208,6 +226,13 @@ return _persist_trie(trie) +def update_persistent_data(index, root, max_idx, last_rev): + """return the incremental update for persistent nodemap from a given index + """ + trie = _update_trie(index, root, last_rev) + return _persist_trie(trie, existing_idx=max_idx) + + S_BLOCK = struct.Struct(">" + ("l" * 16)) NO_ENTRY = -1 @@ -260,6 +285,14 @@ return root +def _update_trie(index, root, last_rev): + """consume""" + for rev in range(last_rev + 1, len(index)): + hex = nodemod.hex(index[rev][7]) + _insert_into_block(index, 0, root, rev, hex) + return root + + def _insert_into_block(index, level, block, current_rev, current_hex): """insert a new revision in a block @@ -269,6 +302,8 @@ current_rev: the revision number we are adding current_hex: the hexadecimal representation of the of that revision """ + if block.ondisk_id is not None: + block.ondisk_id = None hex_digit = _to_int(current_hex[level : level + 1]) entry = block.get(hex_digit) if entry is None: @@ -288,15 +323,22 @@ _insert_into_block(index, level + 1, new, current_rev, current_hex) -def _persist_trie(root): +def _persist_trie(root, existing_idx=None): """turn a nodemap trie into persistent binary data See `_build_trie` for nodemap trie structure""" block_map = {} + if existing_idx is not None: + base_idx = existing_idx + 1 + else: + base_idx = 0 chunks = [] for tn in _walk_trie(root): - block_map[id(tn)] = len(chunks) - chunks.append(_persist_block(tn, block_map)) + if tn.ondisk_id is not None: + block_map[id(tn)] = tn.ondisk_id + else: + block_map[id(tn)] = len(chunks) + base_idx + chunks.append(_persist_block(tn, block_map)) return b''.join(chunks) @@ -338,7 +380,7 @@ msg = "nodemap data size is not a multiple of block size (%d): %d" raise error.Abort(msg % (S_BLOCK.size, len(data))) if not data: - return Block() + return Block(), None block_map = {} new_blocks = [] for i in range(0, len(data), S_BLOCK.size): @@ -356,7 +398,7 @@ b[idx] = block_map[v] else: b[idx] = _transform_rev(v) - return block + return block, i // S_BLOCK.size # debug utility @@ -366,7 +408,7 @@ """verify that the provided nodemap data are valid for the given idex""" ret = 0 ui.status((b"revision in index: %d\n") % len(index)) - root = parse_data(data) + root, __ = parse_data(data) all_revs = set(_all_revisions(root)) ui.status((b"revision in nodemap: %d\n") % len(all_revs)) for r in range(len(index)): diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -156,13 +156,31 @@ index.""" return nodemaputil.persistent_data(self) + def nodemap_data_incremental(self): + """Return bytes containing a incremental update to persistent nodemap + + This containst the data for an append-only update of the data provided + in the last call to `update_nodemap_data`. + """ + if self._nm_root is None: + return None + data = nodemaputil.update_persistent_data( + self, self._nm_root, self._nm_max_idx, self._nm_rev + ) + self._nm_root = self._nm_max_idx = self._nm_rev = None + return data + def update_nodemap_data(self, nm_data): """provide full blokc of persisted binary data for a nodemap The data are expected to come from disk. See `nodemap_data_all` for a produceur of such data.""" if nm_data is not None: - nodemaputil.parse_data(nm_data) + self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data) + if self._nm_root: + self._nm_rev = len(self) - 1 + else: + self._nm_root = self._nm_max_idx = self._nm_rev = None class InlinedIndexObject(BaseIndexObject): To: marmoute, #hg-reviewers Cc: martinvonz, mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel