marmoute created this revision. Herald added a reviewer: indygreg. Herald added a reviewer: hg-reviewers. Herald added a subscriber: mercurial-patches.
REVISION SUMMARY The `revlogio` class is mostly a relic from the past. Once in charge of the full revlog related Input/Output code, that class gradually lost responsibilities to the point where more IO are now done by `revlog.index` objects or revlog objects themself. I would like to ultimately remove the `revlogio` class, to do so I start simple with move the "entry size" information on the index. (The index is already responsible of the binary unpacking, so it knows the size. REPOSITORY rHG Mercurial BRANCH default REVISION DETAIL https://phab.mercurial-scm.org/D10309 AFFECTED FILES contrib/perf.py mercurial/cext/revlog.c mercurial/pure/parsers.py mercurial/revlog.py rust/hg-cpython/src/revlog.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/revlog.rs b/rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs +++ b/rust/hg-cpython/src/revlog.rs @@ -12,8 +12,8 @@ use cpython::{ buffer::{Element, PyBuffer}, exc::{IndexError, ValueError}, - ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject, - PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, + ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule, + PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, }; use hg::{ nodemap::{Block, NodeMapError, NodeTree}, @@ -285,6 +285,10 @@ self.inner_update_nodemap_data(py, docket, nm_data) } + @property + def entry_size(&self) -> PyResult<PyInt> { + self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py) + } }); diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -221,6 +221,8 @@ class revlogoldindex(list): + entry_size = INDEX_ENTRY_V0.size + @property def nodemap(self): msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" @@ -272,11 +274,8 @@ class revlogoldio(object): - def __init__(self): - self.size = INDEX_ENTRY_V0.size - def parseindex(self, data, inline): - s = self.size + s = INDEX_ENTRY_V0.size index = [] nodemap = nodemaputil.NodeMap({nullid: nullrev}) n = off = 0 @@ -333,9 +332,6 @@ class revlogio(object): - def __init__(self): - self.size = INDEX_ENTRY_V1.size - def parseindex(self, data, inline): # call the C implementation to parse the index data index, cache = parsers.parse_index2(data, inline) @@ -349,9 +345,6 @@ class revlogv2io(object): - def __init__(self): - self.size = INDEX_ENTRY_V2.size - def parseindex(self, data, inline): index, cache = parsers.parse_index2(data, inline, revlogv2=True) return index, cache @@ -1715,8 +1708,8 @@ end = int(iend[0] >> 16) + iend[1] if self._inline: - start += (startrev + 1) * self._io.size - end += (endrev + 1) * self._io.size + start += (startrev + 1) * self.index.entry_size + end += (endrev + 1) * self.index.entry_size length = end - start return start, self._getsegment(start, length, df=df) @@ -1750,7 +1743,7 @@ start = self.start length = self.length inline = self._inline - iosize = self._io.size + iosize = self.index.entry_size buffer = util.buffer l = [] @@ -1978,7 +1971,7 @@ sidedata_size = index_entry[9] if self._inline: - sidedata_offset += self._io.size * (1 + rev) + sidedata_offset += self.index.entry_size * (1 + rev) if sidedata_size == 0: return {} @@ -2078,7 +2071,7 @@ # the temp file replace the real index when we exit the context # manager - tr.replace(self.indexfile, trindex * self._io.size) + tr.replace(self.indexfile, trindex * self.index.entry_size) nodemaputil.setup_persistent_nodemap(tr, self) self._chunkclear() @@ -2332,12 +2325,12 @@ # offset is "as if" it were in the .d file, so we need to add on # the size of the entry metadata. self._concurrencychecker( - ifh, self.indexfile, offset + curr * self._io.size + ifh, self.indexfile, offset + curr * self.index.entry_size ) else: # Entries in the .i are a consistent size. self._concurrencychecker( - ifh, self.indexfile, curr * self._io.size + ifh, self.indexfile, curr * self.index.entry_size ) self._concurrencychecker(dfh, self.datafile, offset) @@ -2461,7 +2454,7 @@ dfh.write(sidedata) ifh.write(entry) else: - offset += curr * self._io.size + offset += curr * self.index.entry_size transaction.add(self.indexfile, offset) ifh.write(entry) ifh.write(data[0]) @@ -2499,7 +2492,7 @@ if r: end = self.end(r - 1) ifh = self._indexfp(b"a+") - isize = r * self._io.size + isize = r * self.index.entry_size if self._inline: transaction.add(self.indexfile, end + isize) dfh = None @@ -2655,9 +2648,9 @@ end = self.start(rev) if not self._inline: transaction.add(self.datafile, end) - end = rev * self._io.size + end = rev * self.index.entry_size else: - end += rev * self._io.size + end += rev * self.index.entry_size transaction.add(self.indexfile, end) @@ -2696,7 +2689,7 @@ f.seek(0, io.SEEK_END) actual = f.tell() f.close() - s = self._io.size + s = self.index.entry_size i = max(0, actual // s) di = actual - (i * s) if self._inline: @@ -3238,7 +3231,7 @@ # rewrite the new index entries with self._indexfp(b'w+') as fp: - fp.seek(startrev * self._io.size) + fp.seek(startrev * self.index.entry_size) for i, entry in enumerate(new_entries): rev = startrev + i self.index.replace_sidedata_info(rev, entry[8], entry[9]) diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -49,11 +49,13 @@ big_int_size = struct.calcsize(b'>Q') # Size of a C long int, platform independent int_size = struct.calcsize(b'>i') - # Size of the entire index format - index_size = revlog_constants.INDEX_ENTRY_V1.size # An empty index entry, used as a default value to be overridden, or nullrev null_item = (0, 0, 0, -1, -1, -1, -1, nullid) + @util.propertycache + def entry_size(self): + return self.index_format.size + @property def nodemap(self): msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" @@ -116,7 +118,7 @@ data = self._extra[i - self._lgt] else: index = self._calculate_index(i) - data = self._data[index : index + self.index_size] + data = self._data[index : index + self.entry_size] r = self.index_format.unpack(data) if self._lgt and i == 0: r = (offset_type(0, gettype(r[0])),) + r[1:] @@ -125,13 +127,13 @@ class IndexObject(BaseIndexObject): def __init__(self, data): - assert len(data) % self.index_size == 0 + assert len(data) % self.entry_size == 0 self._data = data - self._lgt = len(data) // self.index_size + self._lgt = len(data) // self.entry_size self._extra = [] def _calculate_index(self, i): - return i * self.index_size + return i * self.entry_size def __delitem__(self, i): if not isinstance(i, slice) or not i.stop == -1 or i.step is not None: @@ -140,7 +142,7 @@ self._check_index(i) self._stripnodes(i) if i < self._lgt: - self._data = self._data[: i * self.index_size] + self._data = self._data[: i * self.entry_size] self._lgt = i self._extra = [] else: @@ -203,7 +205,7 @@ if lgt is not None: self._offsets = [0] * lgt count = 0 - while off <= len(self._data) - self.index_size: + while off <= len(self._data) - self.entry_size: start = off + self.big_int_size (s,) = struct.unpack( b'>i', @@ -212,7 +214,7 @@ if lgt is not None: self._offsets[count] = off count += 1 - off += self.index_size + s + off += self.entry_size + s if off != len(self._data): raise ValueError(b"corrupted data") return count @@ -244,7 +246,6 @@ class Index2Mixin(object): index_format = revlog_constants.INDEX_ENTRY_V2 - index_size = revlog_constants.INDEX_ENTRY_V2.size null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0) def replace_sidedata_info(self, i, sidedata_offset, sidedata_length): @@ -280,7 +281,7 @@ if lgt is not None: self._offsets = [0] * lgt count = 0 - while off <= len(self._data) - self.index_size: + while off <= len(self._data) - self.entry_size: start = off + self.big_int_size (data_size,) = struct.unpack( b'>i', @@ -293,7 +294,7 @@ if lgt is not None: self._offsets[count] = off count += 1 - off += self.index_size + data_size + side_data_size + off += self.entry_size + data_size + side_data_size if off != len(self._data): raise ValueError(b"corrupted data") return count diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -15,6 +15,7 @@ #include <stddef.h> #include <stdlib.h> #include <string.h> +#include <structmember.h> #include "bitmanipulation.h" #include "charencode.h" @@ -2867,6 +2868,12 @@ {NULL} /* Sentinel */ }; +static PyMemberDef index_members[] = { + {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0, + "size of an index entry"}, + {NULL} /* Sentinel */ +}; + PyTypeObject HgRevlogIndex_Type = { PyVarObject_HEAD_INIT(NULL, 0) /* header */ "parsers.index", /* tp_name */ @@ -2896,7 +2903,7 @@ 0, /* tp_iter */ 0, /* tp_iternext */ index_methods, /* tp_methods */ - 0, /* tp_members */ + index_members, /* tp_members */ index_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -3222,7 +3222,10 @@ start = r.start length = r.length inline = r._inline - iosize = r._io.size + try: + iosize = r.index.entry_size + except AttributeError: + iosize = r._io.size buffer = util.buffer chunks = [] To: marmoute, indygreg, #hg-reviewers Cc: mercurial-patches, mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel