D7886: nodemap: introduce append-only incremental update of the persisten data
Closed by commit rHG50ad851efd9b: nodemap: introduce append-only incremental update of the persistent data (authored by marmoute). This revision was automatically updated to reflect the committed changes. This revision was not accepted when it landed; it landed in state "Needs Review". REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D7886?vs=19898&id=20119 CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 AFFECTED FILES mercurial/pure/parsers.py mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -49,8 +49,19 @@ $ hg ci -m 'foo' $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + +(The pure code use the debug code that perform incremental update, the C code reencode from scratch) + +#if pure + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob) + +#else $ f --sha256 .hg/store/00changelog-*.nd --size .hg/store/00changelog-.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob) + +#endif + $ hg debugnodemap --check revision in index: 5002 revision in nodemap: 5002 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -69,12 +69,41 @@ if revlog.nodemap_file is None: msg = "calling persist nodemap on a revlog without the feature enableb" raise error.ProgrammingError(msg) -if util.safehasattr(revlog.index, "nodemap_data_all"): -data = revlog.index.nodemap_data_all() + +can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") +ondisk_docket = revlog._nodemap_docket + +# first attemp an incremental update of the data +if can_incremental and ondisk_docket is not None: +target_docket = revlog._nodemap_docket.copy() +data = revlog.index.nodemap_data_incremental() +datafile = _rawdata_filepath(revlog, target_docket) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'a') as fd: +fd.write(data) else: -data = persistent_data(revlog.index) -target_docket = NodeMapDocket() -datafile = _rawdata_filepath(revlog, target_docket) +# otherwise fallback to a full new export +target_docket = NodeMapDocket() +datafile = _rawdata_filepath(revlog, target_docket) +if util.safehasattr(revlog.index, "nodemap_data_all"): +data = revlog.index.nodemap_data_all() +else: +data = persistent_data(revlog.index) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'w') as fd: +fd.write(data) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: +fp.write(target_docket.serialize()) +revlog._nodemap_docket = target_docket +# EXP-TODO: if the transaction abort, we should remove the new data and +# reinstall the old one. + +# search for old index file in all cases, some older process might have +# left one behind. olds = _other_rawdata_filepath(revlog, target_docket) if olds: realvfs = getattr(revlog, '_realopener', revlog.opener) @@ -85,17 +114,6 @@ callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file tr.addpostclose(callback_id, cleanup) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(datafile, b'w') as fd: -fd.write(data) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: -fp.write(target_docket.serialize()) -revlog._nodemap_docket = target_docket -# EXP-TODO: if the transaction abort, we should remove the new data and -# reinstall the old one. ### Nodemap docket file @@ -208,6 +226,13 @@ return _persist_trie(trie) +def update_persistent_data(index, root, max_idx, last_rev): +"""return the incremental update for persistent nodemap from a given index +""" +trie = _update_trie(index, root, last_rev) +return _persist_trie(trie, existing_idx=max_idx) + + S_BLOCK = struct.Struct(">" + ("l" * 16)) NO_ENTRY = -1 @@ -260,6 +285,14 @@ return root +def _update_trie(index, root, last_rev): +"""consume""" +
D7886: nodemap: introduce append-only incremental update of the persisten data
marmoute added a comment. marmoute updated this revision to Diff 19898. rebase to latest default REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D7886?vs=19839&id=19898 CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 AFFECTED FILES mercurial/pure/parsers.py mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -49,8 +49,19 @@ $ hg ci -m 'foo' $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + +(The pure code use the debug code that perform incremental update, the C code reencode from scratch) + +#if pure + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob) + +#else $ f --sha256 .hg/store/00changelog-*.nd --size .hg/store/00changelog-.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob) + +#endif + $ hg debugnodemap --check revision in index: 5002 revision in nodemap: 5002 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -69,12 +69,41 @@ if revlog.nodemap_file is None: msg = "calling persist nodemap on a revlog without the feature enableb" raise error.ProgrammingError(msg) -if util.safehasattr(revlog.index, "nodemap_data_all"): -data = revlog.index.nodemap_data_all() + +can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") +ondisk_docket = revlog._nodemap_docket + +# first attemp an incremental update of the data +if can_incremental and ondisk_docket is not None: +target_docket = revlog._nodemap_docket.copy() +data = revlog.index.nodemap_data_incremental() +datafile = _rawdata_filepath(revlog, target_docket) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'a') as fd: +fd.write(data) else: -data = persistent_data(revlog.index) -target_docket = NodeMapDocket() -datafile = _rawdata_filepath(revlog, target_docket) +# otherwise fallback to a full new export +target_docket = NodeMapDocket() +datafile = _rawdata_filepath(revlog, target_docket) +if util.safehasattr(revlog.index, "nodemap_data_all"): +data = revlog.index.nodemap_data_all() +else: +data = persistent_data(revlog.index) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'w') as fd: +fd.write(data) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: +fp.write(target_docket.serialize()) +revlog._nodemap_docket = target_docket +# EXP-TODO: if the transaction abort, we should remove the new data and +# reinstall the old one. + +# search for old index file in all cases, some older process might have +# left one behind. olds = _other_rawdata_filepath(revlog, target_docket) if olds: realvfs = getattr(revlog, '_realopener', revlog.opener) @@ -85,17 +114,6 @@ callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file tr.addpostclose(callback_id, cleanup) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(datafile, b'w') as fd: -fd.write(data) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: -fp.write(target_docket.serialize()) -revlog._nodemap_docket = target_docket -# EXP-TODO: if the transaction abort, we should remove the new data and -# reinstall the old one. ### Nodemap docket file @@ -208,6 +226,13 @@ return _persist_trie(trie) +def update_persistent_data(index, root, max_idx, last_rev): +"""return the incremental update for persistent nodemap from a given index +""" +trie = _update_trie(index, root, last_rev) +return _persist_trie(trie, existing_idx=max_idx) + + S_BLOCK = struct.Struct(">" + ("l" * 16)) NO_ENTRY = -1 @@ -260,6 +285,14 @@ return root +def _update_trie(index, root, last_rev): +"""consume""" +for rev in range(last_rev + 1, len(index)): +hex = nodemod.hex(index[rev][7]) +_insert_into_block(index, 0, root, rev, hex) +return root + + def _insert_into_block(in
D7886: nodemap: introduce append-only incremental update of the persisten data
marmoute added a comment. marmoute updated this revision to Diff 19839. small doc update on .#s[1] REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D7886?vs=19795&id=19839 CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 AFFECTED FILES mercurial/pure/parsers.py mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -49,8 +49,19 @@ $ hg ci -m 'foo' $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + +(The pure code use the debug code that perform incremental update, the C code reencode from scratch) + +#if pure + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) + +#else $ f --sha256 .hg/store/00changelog-*.nd --size .hg/store/00changelog-.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) + +#endif + $ hg debugnodemap --check revision in index: 5002 revision in nodemap: 5002 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -69,12 +69,41 @@ if revlog.nodemap_file is None: msg = "calling persist nodemap on a revlog without the feature enableb" raise error.ProgrammingError(msg) -if util.safehasattr(revlog.index, "nodemap_data_all"): -data = revlog.index.nodemap_data_all() + +can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") +ondisk_docket = revlog._nodemap_docket + +# first attemp an incremental update of the data +if can_incremental and ondisk_docket is not None: +target_docket = revlog._nodemap_docket.copy() +data = revlog.index.nodemap_data_incremental() +datafile = _rawdata_filepath(revlog, target_docket) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'a') as fd: +fd.write(data) else: -data = persistent_data(revlog.index) -target_docket = NodeMapDocket() -datafile = _rawdata_filepath(revlog, target_docket) +# otherwise fallback to a full new export +target_docket = NodeMapDocket() +datafile = _rawdata_filepath(revlog, target_docket) +if util.safehasattr(revlog.index, "nodemap_data_all"): +data = revlog.index.nodemap_data_all() +else: +data = persistent_data(revlog.index) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'w') as fd: +fd.write(data) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: +fp.write(target_docket.serialize()) +revlog._nodemap_docket = target_docket +# EXP-TODO: if the transaction abort, we should remove the new data and +# reinstall the old one. + +# search for old index file in all cases, some older process might have +# left one behind. olds = _other_rawdata_filepath(revlog, target_docket) if olds: realvfs = getattr(revlog, '_realopener', revlog.opener) @@ -85,17 +114,6 @@ callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file tr.addpostclose(callback_id, cleanup) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(datafile, b'w') as fd: -fd.write(data) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: -fp.write(target_docket.serialize()) -revlog._nodemap_docket = target_docket -# EXP-TODO: if the transaction abort, we should remove the new data and -# reinstall the old one. ### Nodemap docket file @@ -208,6 +226,13 @@ return _persist_trie(trie) +def update_persistent_data(index, root, max_idx, last_rev): +"""return the incremental update for persistent nodemap from a given index +""" +trie = _update_trie(index, root, last_rev) +return _persist_trie(trie, existing_idx=max_idx) + + S_BLOCK = struct.Struct(">" + ("q" * 16)) NO_ENTRY = -1 @@ -260,6 +285,14 @@ return root +def _update_trie(index, root, last_rev): +"""consume""" +for rev in range(last_rev + 1, len(index)): +hex = nodemod.hex(index[rev][7]) +_insert_into_block(index, 0, root, rev, hex) +return root + + def _insert_into_block(
D7886: nodemap: introduce append-only incremental update of the persisten data
marmoute updated this revision to Diff 19795. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D7886?vs=19767&id=19795 CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 AFFECTED FILES mercurial/pure/parsers.py mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -49,8 +49,19 @@ $ hg ci -m 'foo' $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + +(The pure code use the debug code that perform incremental update, the C code reencode from scratch) + +#if pure + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) + +#else $ f --sha256 .hg/store/00changelog-*.nd --size .hg/store/00changelog-.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) + +#endif + $ hg debugnodemap --check revision in index: 5002 revision in nodemap: 5002 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -69,12 +69,41 @@ if revlog.nodemap_file is None: msg = "calling persist nodemap on a revlog without the feature enableb" raise error.ProgrammingError(msg) -if util.safehasattr(revlog.index, "nodemap_data_all"): -data = revlog.index.nodemap_data_all() + +can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") +ondisk_docket = revlog._nodemap_docket + +# first attemp an incremental update of the data +if can_incremental and ondisk_docket is not None: +target_docket = revlog._nodemap_docket.copy() +data = revlog.index.nodemap_data_incremental() +datafile = _rawdata_filepath(revlog, target_docket) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'a') as fd: +fd.write(data) else: -data = persistent_data(revlog.index) -target_docket = NodeMapDocket() -datafile = _rawdata_filepath(revlog, target_docket) +# otherwise fallback to a full new export +target_docket = NodeMapDocket() +datafile = _rawdata_filepath(revlog, target_docket) +if util.safehasattr(revlog.index, "nodemap_data_all"): +data = revlog.index.nodemap_data_all() +else: +data = persistent_data(revlog.index) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, b'w') as fd: +fd.write(data) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: +fp.write(target_docket.serialize()) +revlog._nodemap_docket = target_docket +# EXP-TODO: if the transaction abort, we should remove the new data and +# reinstall the old one. + +# search for old index file in all cases, some older process might have +# left one behind. olds = _other_rawdata_filepath(revlog, target_docket) if olds: realvfs = getattr(revlog, '_realopener', revlog.opener) @@ -85,17 +114,6 @@ callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file tr.addpostclose(callback_id, cleanup) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(datafile, b'w') as fd: -fd.write(data) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp: -fp.write(target_docket.serialize()) -revlog._nodemap_docket = target_docket -# EXP-TODO: if the transaction abort, we should remove the new data and -# reinstall the old one. ### Nodemap docket file @@ -208,6 +226,13 @@ return _persist_trie(trie) +def update_persistent_data(index, root, max_idx, last_rev): +"""return the incremental update for persistent nodemap from a given index +""" +trie = _update_trie(index, root, last_rev) +return _persist_trie(trie, existing_idx=max_idx) + + S_BLOCK = struct.Struct(">" + ("q" * 16)) NO_ENTRY = -1 @@ -259,6 +284,14 @@ return root +def _update_trie(index, root, last_rev): +"""consume""" +for rev in range(last_rev + 1, len(index)): +hex = nodemod.hex(index[rev][7]) +_insert_into_block(index, 0, root, rev, hex) +return root + + def _insert_into_block(index, level, block, current_rev, current_hex): """i
D7886: nodemap: introduce append-only incremental update of the persisten data
marmoute updated this revision to Diff 19767. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D7886?vs=19304&id=19767 CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 AFFECTED FILES mercurial/pure/parsers.py mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -49,8 +49,19 @@ $ hg ci -m 'foo' $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + +(The pure code use the debug code that perform incremental update, the C code reencode from scratch) + +#if pure + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) + +#else $ f --sha256 .hg/store/00changelog-*.nd --size .hg/store/00changelog-.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) + +#endif + $ hg debugnodemap --check revision in index: 5002 revision in nodemap: 5002 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -69,12 +69,41 @@ if revlog.nodemap_file is None: msg = "calling persist nodemap on a revlog without the feature enableb" raise error.ProgrammingError(msg) -if util.safehasattr(revlog.index, "nodemap_data_all"): -data = revlog.index.nodemap_data_all() + +can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") +ondisk_docket = revlog._nodemap_docket + +# first attemp an incremental update of the data +if can_incremental and ondisk_docket is not None: +target_docket = revlog._nodemap_docket.copy() +data = revlog.index.nodemap_data_incremental() +datafile = _rawdata_filepath(revlog, target_docket) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, 'a') as fd: +fd.write(data) else: -data = persistent_data(revlog.index) -target_docket = NodeMapDocket() -datafile = _rawdata_filepath(revlog, target_docket) +# otherwise fallback to a full new export +target_docket = NodeMapDocket() +datafile = _rawdata_filepath(revlog, target_docket) +if util.safehasattr(revlog.index, "nodemap_data_all"): +data = revlog.index.nodemap_data_all() +else: +data = persistent_data(revlog.index) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, 'w') as fd: +fd.write(data) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp: +fp.write(target_docket.serialize()) +revlog._nodemap_docket = target_docket +# EXP-TODO: if the transaction abort, we should remove the new data and +# reinstall the old one. + +# search for old index file in all cases, some older process might have +# left one behind. olds = _other_rawdata_filepath(revlog, target_docket) if olds: realvfs = getattr(revlog, '_realopener', revlog.opener) @@ -85,17 +114,6 @@ callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file tr.addpostclose(callback_id, cleanup) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(datafile, 'w') as fd: -fd.write(data) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp: -fp.write(target_docket.serialize()) -revlog._nodemap_docket = target_docket -# EXP-TODO: if the transaction abort, we should remove the new data and -# reinstall the old one. ### Nodemap docket file @@ -208,6 +226,13 @@ return _persist_trie(trie) +def update_persistent_data(index, root, max_idx, last_rev): +"""return the incremental update for persistent nodemap from a given index +""" +trie = _update_trie(index, root, last_rev) +return _persist_trie(trie, existing_idx=max_idx) + + S_BLOCK = struct.Struct(">" + ("q" * 16)) NO_ENTRY = -1 @@ -259,6 +284,14 @@ return root +def _update_trie(index, root, last_rev): +"""consume""" +for rev in range(last_rev + 1, len(index)): +hex = nodemod.hex(index[rev][7]) +_insert_into_block(index, 0, root, rev, hex) +return root + + def _insert_into_block(index, level, block, current_rev, current_hex): """insert
D7886: nodemap: introduce append-only incremental update of the persisten data
marmoute added inline comments. INLINE COMMENTS > martinvonz wrote in test-persistent-nodemap.t:53 > Which patch introduced the C code? I have not looked at all patches yet, but > I thought the idea was that this entire stack was just pure Python and then > you'd add a Rust version (not C) in a separate series. There are no C code (and is non planned). This should be `the C path` or `the cext policy`. REPOSITORY rHG Mercurial CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 To: marmoute, #hg-reviewers Cc: martinvonz, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D7886: nodemap: introduce append-only incremental update of the persisten data
martinvonz added inline comments. INLINE COMMENTS > test-persistent-nodemap.t:53 > + > +(The pure code use the debug code that perform incremental update, the C > code reencode from scratch) > + Which patch introduced the C code? I have not looked at all patches yet, but I thought the idea was that this entire stack was just pure Python and then you'd add a Rust version (not C) in a separate series. REPOSITORY rHG Mercurial CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D7886/new/ REVISION DETAIL https://phab.mercurial-scm.org/D7886 To: marmoute, #hg-reviewers Cc: martinvonz, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D7886: nodemap: introduce append-only incremental update of the persisten data
marmoute created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY Rewriting the full nodemap for each transaction has a cost we would like to avoid. We introduce a new way to write persistent nodemap data by adding new information at the end for file. Any new and updated block as added at the end of the file. The last block is the new root node. With this method, some of the block already on disk get "dereferenced" and become dead data. In later changesets, We'll start tracking the amount of dead data to eventually re-generate a full nodemap. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D7886 AFFECTED FILES mercurial/pure/parsers.py mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t CHANGE DETAILS diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -49,8 +49,19 @@ $ hg ci -m 'foo' $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + +(The pure code use the debug code that perform incremental update, the C code reencode from scratch) + +#if pure + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob) + +#else $ f --sha256 .hg/store/00changelog-*.nd --size .hg/store/00changelog-.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob) + +#endif + $ hg debugnodemap --check revision in index: 5002 revision in nodemap: 5002 diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py +++ b/mercurial/revlogutils/nodemap.py @@ -69,12 +69,41 @@ if revlog.nodemap_file is None: msg = "calling persist nodemap on a revlog without the feature enableb" raise error.ProgrammingError(msg) -if util.safehasattr(revlog.index, "nodemap_data_all"): -data = revlog.index.nodemap_data_all() + +can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental") +ondisk_docket = revlog._nodemap_docket + +# first attemp an incremental update of the data +if can_incremental and ondisk_docket is not None: +target_docket = revlog._nodemap_docket.copy() +data = revlog.index.nodemap_data_incremental() +datafile = _rawdata_filepath(revlog, target_docket) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, 'a') as fd: +fd.write(data) else: -data = persistent_data(revlog.index) -target_docket = NodeMapDocket() -datafile = _rawdata_filepath(revlog, target_docket) +# otherwise fallback to a full new export +target_docket = NodeMapDocket() +datafile = _rawdata_filepath(revlog, target_docket) +if util.safehasattr(revlog.index, "nodemap_data_all"): +data = revlog.index.nodemap_data_all() +else: +data = persistent_data(revlog.index) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(datafile, 'w') as fd: +fd.write(data) +# EXP-TODO: if this is a cache, this should use a cache vfs, not a +# store vfs +with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp: +fp.write(target_docket.serialize()) +revlog._nodemap_docket = target_docket +# EXP-TODO: if the transaction abort, we should remove the new data and +# reinstall the old one. + +# search for old index file in all cases, some older process might have +# left one behind. olds = _other_rawdata_filepath(revlog, target_docket) if olds: realvfs = getattr(revlog, '_realopener', revlog.opener) @@ -85,17 +114,6 @@ callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file tr.addpostclose(callback_id, cleanup) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(datafile, 'w') as fd: -fd.write(data) -# EXP-TODO: if this is a cache, this should use a cache vfs, not a -# store vfs -with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp: -fp.write(target_docket.serialize()) -revlog._nodemap_docket = target_docket -# EXP-TODO: if the transaction abort, we should remove the new data and -# reinstall the old one. ### Nodemap docket file @@ -208,6 +226,13 @@ return _dump_trie(trie) +def update_persistent_data(index, root, max_idx, last_rev): +"""return the serialised data of a nodemap for a given index +""" +trie = _update_trie(index, root, last_rev) +return _dump_trie(trie, existing_idx=max_idx) + + S_