marmoute created this revision. Herald added a reviewer: indygreg. Herald added a reviewer: hg-reviewers. Herald added a subscriber: mercurial-patches.
REVISION SUMMARY That will make it possible to keep track of compression information in the revlog index, opening the way to more efficient revision restoration (in native code, but the python usage is already defeating performance work). We start with adding a new entry to the index tuple, using a value matching the current behavior. We will introduce storage and other value in later changesets. REPOSITORY rHG Mercurial BRANCH default REVISION DETAIL https://phab.mercurial-scm.org/D10646 AFFECTED FILES mercurial/bundlerepo.py mercurial/cext/parsers.c mercurial/cext/revlog.c mercurial/policy.py mercurial/pure/parsers.py mercurial/revlog.py mercurial/revlogutils/constants.py mercurial/revlogutils/revlogv0.py mercurial/unionrepo.py tests/test-parseindex2.py CHANGE DETAILS diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py --- a/tests/test-parseindex2.py +++ b/tests/test-parseindex2.py @@ -21,6 +21,9 @@ policy, pycompat, ) +from mercurial.revlogutils import ( + constants, +) parsers = policy.importmod('parsers') @@ -49,7 +52,7 @@ cache = (0, data) while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0) + e = e + (0, 0, constants.COMP_MODE_INLINE) nodemap[e[7]] = n append(e) n += 1 @@ -59,7 +62,7 @@ else: while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0) + e = e + (0, 0, constants.COMP_MODE_INLINE) nodemap[e[7]] = n append(e) n += 1 @@ -242,7 +245,19 @@ break def testminusone(self): - want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + want = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + constants.COMP_MODE_INLINE, + ) index, junk = parsers.parse_index2(data_inlined, True) got = index[-1] self.assertEqual(want, got) # inline data @@ -264,7 +279,20 @@ # node won't matter for this test, let's just make sure # they don't collide. Other data don't matter either. node = hexrev(p1) + hexrev(p2) + b'.' * 12 - index.append((0, 0, 12, 1, 34, p1, p2, node, 0, 0)) + e = ( + 0, + 0, + 12, + 1, + 34, + p1, + p2, + node, + 0, + 0, + constants.COMP_MODE_INLINE, + ) + index.append(e) appendrev(4) appendrev(5) diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py --- a/mercurial/unionrepo.py +++ b/mercurial/unionrepo.py @@ -31,6 +31,10 @@ vfs as vfsmod, ) +from .revlogutils import ( + constants as revlog_constants, +) + class unionrevlog(revlog.revlog): def __init__(self, opener, radix, revlog2, linkmapper): @@ -65,6 +69,7 @@ node, _sdo, _sds, + _dcm, ) = rev flags = _start & 0xFFFF @@ -99,6 +104,7 @@ node, 0, # sidedata offset 0, # sidedata size + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) diff --git a/mercurial/revlogutils/revlogv0.py b/mercurial/revlogutils/revlogv0.py --- a/mercurial/revlogutils/revlogv0.py +++ b/mercurial/revlogutils/revlogv0.py @@ -9,6 +9,7 @@ from ..node import sha1nodeconstants from .constants import ( + COMP_MODE_INLINE, INDEX_ENTRY_V0, ) from ..i18n import _ @@ -42,7 +43,19 @@ class revlogoldindex(list): entry_size = INDEX_ENTRY_V0.size - null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + null_item = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + COMP_MODE_INLINE, + ) @property def nodemap(self): @@ -138,6 +151,7 @@ e[6], 0, # no side data support 0, # no side data support + COMP_MODE_INLINE, ) index.append(e2) nodemap[e[6]] = n diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py +++ b/mercurial/revlogutils/constants.py @@ -114,6 +114,10 @@ # bitmark for flags that could cause rawdata content change REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED +## chunk compression mode constants: +# chunk use a compression stored "inline" at the start of the chunk itself. +COMP_MODE_INLINE = 2 + SUPPORTED_FLAGS = { REVLOGV0: REVLOGV0_FLAGS, REVLOGV1: REVLOGV1_FLAGS, @@ -152,4 +156,5 @@ }, } + SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000 diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -35,6 +35,7 @@ from .pycompat import getattr from .revlogutils.constants import ( ALL_KINDS, + COMP_MODE_INLINE, FEATURES_BY_VERSION, FLAG_GENERALDELTA, FLAG_INLINE_DATA, @@ -336,6 +337,10 @@ [9] sidedata chunk length: The size, in bytes, of the revision side-data chunk. + + [10] data compression mode: + two bits that details the way the data chunk is compressed on disk. + (see "COMP_MODE_*" constant for details) """ _flagserrorclass = error.RevlogError @@ -2474,6 +2479,7 @@ node, sidedata_offset, len(serialized_sidedata), + COMP_MODE_INLINE, ) self.index.append(e) diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -54,7 +54,19 @@ # Size of a C long int, platform independent int_size = struct.calcsize(b'>i') # An empty index entry, used as a default value to be overridden, or nullrev - null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + null_item = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + revlog_constants.COMP_MODE_INLINE, + ) @util.propertycache def entry_size(self): @@ -135,7 +147,7 @@ def _unpack_entry(self, data): r = self.index_format.unpack(data) - r = r + (0, 0) + r = r + (0, 0, revlog_constants.COMP_MODE_INLINE) return r def pack_header(self, header): @@ -303,16 +315,17 @@ self._extra[rev - self._lgt] = new def _unpack_entry(self, data): - return self.index_format.unpack(data) + return self.index_format.unpack(data) + ( + revlog_constants.COMP_MODE_INLINE, + ) def _pack_entry(self, entry): - return self.index_format.pack(*entry) + return self.index_format.pack(*entry[:10]) def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] - p = revlog_constants.INDEX_ENTRY_V2.pack(*entry) - return p + return self._pack_entry(entry) def pack_header(self, header): """pack header information as binary""" diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -80,7 +80,7 @@ ('cext', 'bdiff'): 3, ('cext', 'mpatch'): 1, ('cext', 'osutil'): 4, - ('cext', 'parsers'): 18, + ('cext', 'parsers'): 19, } # map import request to other package or module diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -118,9 +118,9 @@ static int index_find_node(indexObject *self, const char *node); #if LONG_MAX == 0x7fffffffL -static const char *const tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki"); +static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB"); #else -static const char *const tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki"); +static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB"); #endif /* A RevlogNG v1 index entry is 64 bytes long. */ @@ -132,6 +132,8 @@ static const long format_v1 = 1; /* Internal only, could be any number */ static const long format_v2 = 2; /* Internal only, could be any number */ +static const char comp_mode_inline = 2; + static void raise_revlog_error(void) { PyObject *mod = NULL, *dict = NULL, *errclass = NULL; @@ -294,6 +296,7 @@ uint64_t offset_flags, sidedata_offset; int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, sidedata_comp_len; + char data_comp_mode; const char *c_node_id; const char *data; Py_ssize_t length = index_length(self); @@ -340,9 +343,11 @@ sidedata_comp_len = getbe32(data + 72); } + data_comp_mode = comp_mode_inline; return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, c_node_id, - self->nodelen, sidedata_offset, sidedata_comp_len); + self->nodelen, sidedata_offset, sidedata_comp_len, + data_comp_mode); } /* * Pack header information in binary @@ -443,6 +448,7 @@ { uint64_t offset_flags, sidedata_offset; int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; + char data_comp_mode; Py_ssize_t c_node_id_len, sidedata_comp_len; const char *c_node_id; char *data; @@ -450,8 +456,9 @@ if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len, &uncomp_len, &base_rev, &link_rev, &parent_1, &parent_2, &c_node_id, &c_node_id_len, - &sidedata_offset, &sidedata_comp_len)) { - PyErr_SetString(PyExc_TypeError, "10-tuple required"); + &sidedata_offset, &sidedata_comp_len, + &data_comp_mode)) { + PyErr_SetString(PyExc_TypeError, "11-tuple required"); return NULL; } @@ -459,6 +466,12 @@ PyErr_SetString(PyExc_TypeError, "invalid node"); return NULL; } + if (data_comp_mode != comp_mode_inline) { + PyErr_Format(PyExc_ValueError, + "invalid data compression mode: %i", + data_comp_mode); + return NULL; + } if (self->new_length == self->added_length) { size_t new_added_length = @@ -2761,9 +2774,9 @@ self->entry_size = v1_entry_size; } - self->nullentry = - Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1, - -1, -1, nullid, self->nodelen, 0, 0); + self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0, + 0, 0, -1, -1, -1, -1, nullid, + self->nodelen, 0, 0, comp_mode_inline); if (!self->nullentry) return -1; diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c +++ b/mercurial/cext/parsers.c @@ -668,7 +668,7 @@ void manifest_module_init(PyObject *mod); void revlog_module_init(PyObject *mod); -static const int version = 18; +static const int version = 19; static void module_init(PyObject *mod) { diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py +++ b/mercurial/bundlerepo.py @@ -105,6 +105,7 @@ node, 0, 0, + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) To: marmoute, indygreg, #hg-reviewers Cc: mercurial-patches, mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel