D1769: cext: use dedicated type for index entries
This revision now requires changes to proceed. baymax added a comment. baymax requested changes to this revision. There seems to have been no activities on this Diff for the past 3 Months. By policy, we are automatically moving it out of the `need-review` state. Please, move it back to `need-review` without hesitation if this diff should still be discussed. :baymax:need-review-idle: REPOSITORY rHG Mercurial CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D1769/new/ REVISION DETAIL https://phab.mercurial-scm.org/D1769 To: indygreg, #hg-reviewers, yuja, durin42, baymax Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1769: cext: use dedicated type for index entries
indygreg updated this revision to Diff 4738. indygreg edited the summary of this revision. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D1769?vs=4630=4738 REVISION DETAIL https://phab.mercurial-scm.org/D1769 AFFECTED FILES mercurial/cext/parsers.c mercurial/cext/revlog.c mercurial/policy.py CHANGE DETAILS diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -75,7 +75,7 @@ (r'cext', r'diffhelpers'): 1, (r'cext', r'mpatch'): 1, (r'cext', r'osutil'): 3, -(r'cext', r'parsers'): 5, +(r'cext', r'parsers'): 6, } # map import request to other package or module diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -133,10 +133,22 @@ int *ps, int maxrev) { if (rev >= self->length - 1) { - PyObject *tuple = PyList_GET_ITEM(self->added, + PyObject *value; + PyObject *entry = PyList_GET_ITEM(self->added, rev - self->length + 1); - ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5)); - ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6)); + value = PyObject_GetAttrString(entry, "p1rev"); + if (!value) { + return -1; + } + ps[0] = (int)PyInt_AS_LONG(value); + Py_DECREF(value); + + value = PyObject_GetAttrString(entry, "p2rev"); + if (!value) { + return -1; + } + ps[1] = (int)PyInt_AS_LONG(value); + Py_DECREF(value); } else { const char *data = index_deref(self, rev); ps[0] = getbe32(data + 24); @@ -224,9 +236,9 @@ parent_2 = getbe32(data + 28); c_node_id = data + 32; - entry = Py_BuildValue(index_entry_format, offset_flags, comp_len, - uncomp_len, base_rev, link_rev, - parent_1, parent_2, c_node_id, 20); + entry = PyObject_CallFunction(self->entrytype, index_entry_format, + offset_flags, comp_len, uncomp_len, base_rev, + link_rev, parent_1, parent_2, c_node_id, 20); if (entry) { PyObject_GC_UnTrack(entry); @@ -253,10 +265,16 @@ return NULL; if (pos >= self->length - 1) { - PyObject *tuple, *str; - tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); - str = PyTuple_GetItem(tuple, 7); - return str ? PyBytes_AS_STRING(str) : NULL; + PyObject *entry, *str; + char *result; + entry = PyList_GET_ITEM(self->added, pos - self->length + 1); + str = PyObject_GetAttrString(entry, "node"); + if (!str) { + return NULL; + } + result = PyBytes_AS_STRING(str); + Py_DECREF(str); + return result; } data = index_deref(self, pos); @@ -277,21 +295,46 @@ static PyObject *index_insert(indexObject *self, PyObject *args) { - PyObject *obj; + PyObject *obj, *nodeobj; + PyObject *result = NULL; + PyObject *tmpobj = NULL; char *node; int index; Py_ssize_t len, nodelen; if (!PyArg_ParseTuple(args, "iO", , )) return NULL; - if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { - PyErr_SetString(PyExc_TypeError, "8-tuple required"); - return NULL; + /* Legacy callers may pass tuples. Convert to an index entry until +* API compatibility is dropped. */ + if (PyTuple_Check(obj)) { + tmpobj = PyObject_Call(self->entrytype, obj, NULL); + if (!tmpobj) { + return NULL; + } + + /* We own a reference to tmpobj. So any return after here + needs to decrease its refcount. */ + obj = tmpobj; } - if (node_check(PyTuple_GET_ITEM(obj, 7), , ) == -1) - return NULL; + if (!PyObject_TypeCheck(obj, (PyTypeObject *)self->entrytype)) { + PyErr_SetString(PyExc_TypeError, "IndexV1Entry type required"); + goto cleanup; + } + + nodeobj = PyObject_GetAttrString(obj, "node"); + if (!nodeobj) { + PyErr_SetString(PyExc_ValueError, "could not retrieve node"); + goto cleanup; + } + if (node_check(nodeobj, , ) == -1) { + Py_DECREF(nodeobj); + goto cleanup; + } + + Py_DECREF(nodeobj); + nodeobj = NULL; len = index_length(self); @@ -301,23 +344,30 @@ if
D1769: cext: use dedicated type for index entries
yuja requested changes to this revision. yuja added inline comments. This revision now requires changes to proceed. INLINE COMMENTS > revlog.c:141 > + if (!value) { > + PyErr_SetString(PyExc_ValueError, > + "could not retrieve p1rev"); Perhaps AttributeError would be set by `PyObject_GetAttrString()`. > revlog.c:321 > +needs to decrease its refcount. */ > + obj = tmpobj; > } This is tricky, but looks okay. Alternatively, we could inc/decref borrowed `obj`. > revlog.c:327 > + result = NULL; > + goto cleanup; > + } I slightly prefer `goto bail` instead of `result = NULL; goto cleanup` pairs. > revlog.c:1788 > + PyErr_SetString(PyExc_ValueError, "could not get node"); > + return; > + } No error reported to the caller. AttributeError would be set automatically. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1769 To: indygreg, #hg-reviewers, yuja Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D1769: cext: use dedicated type for index entries
indygreg created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY Now that we have a handle on our type to represent revlog index entries, let's use it. This commit essentially consists of porting code from PyTuple to PyObject. As part of porting the code, we now retrieve elements in the entry type by named attribute instead of integer index. Before, PyTuple_* APIs allowed us to retrieve a borrowed reference to PyObject elements. We can no longer do this via the PyObject_* APIs for a type not implemented in C. This required extra refcount manipulation in various places. The C extension is now emitting the new type. And because various code is still accessing index entry elements via __getitem__, we see a performance regression in the Firefox repository: $ hg perfrevset '::tip' ! wall 0.672636 comb 0.67 user 0.65 sys 0.02 (best of 15) ! wall 0.962372 comb 0.96 user 0.94 sys 0.02 (best of 10) We will claw back this regression in subsequent commits by accessing fields by name instead of index. The version of the "parsers" C extension has been incremented to reflect the backwards incompatible API change. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D1769 AFFECTED FILES mercurial/cext/parsers.c mercurial/cext/revlog.c mercurial/policy.py CHANGE DETAILS diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -75,7 +75,7 @@ (r'cext', r'diffhelpers'): 1, (r'cext', r'mpatch'): 1, (r'cext', r'osutil'): 2, -(r'cext', r'parsers'): 4, +(r'cext', r'parsers'): 5, } # map import request to other package or module diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -133,10 +133,26 @@ int *ps, int maxrev) { if (rev >= self->length - 1) { - PyObject *tuple = PyList_GET_ITEM(self->added, + PyObject *value; + PyObject *entry = PyList_GET_ITEM(self->added, rev - self->length + 1); - ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5)); - ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6)); + value = PyObject_GetAttrString(entry, "p1rev"); + if (!value) { + PyErr_SetString(PyExc_ValueError, + "could not retrieve p1rev"); + return -1; + } + ps[0] = (int)PyInt_AS_LONG(value); + Py_DECREF(value); + + value = PyObject_GetAttrString(entry, "p2rev"); + if (!value) { + PyErr_SetString(PyExc_ValueError, + "could not retrieve p2rev"); + return -1; + } + ps[1] = (int)PyInt_AS_LONG(value); + Py_DECREF(value); } else { const char *data = index_deref(self, rev); ps[0] = getbe32(data + 24); @@ -224,9 +240,9 @@ parent_2 = getbe32(data + 28); c_node_id = data + 32; - entry = Py_BuildValue(index_entry_format, offset_flags, comp_len, - uncomp_len, base_rev, link_rev, - parent_1, parent_2, c_node_id, 20); + entry = PyObject_CallFunction(self->entrytype, index_entry_format, + offset_flags, comp_len, uncomp_len, base_rev, + link_rev, parent_1, parent_2, c_node_id, 20); if (entry) { PyObject_GC_UnTrack(entry); @@ -253,10 +269,16 @@ return NULL; if (pos >= self->length - 1) { - PyObject *tuple, *str; - tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); - str = PyTuple_GetItem(tuple, 7); - return str ? PyBytes_AS_STRING(str) : NULL; + PyObject *entry, *str; + char *result; + entry = PyList_GET_ITEM(self->added, pos - self->length + 1); + str = PyObject_GetAttrString(entry, "node"); + if (!str) { + return NULL; + } + result = PyBytes_AS_STRING(str); + Py_DECREF(str); + return result; } data = index_deref(self, pos); @@ -277,21 +299,48 @@ static PyObject *index_insert(indexObject *self, PyObject *args) { - PyObject *obj; + PyObject *obj, *nodeobj, *result; + PyObject *tmpobj = NULL; char *node; int index; Py_ssize_t len, nodelen; if (!PyArg_ParseTuple(args, "iO", , )) return NULL; - if (!PyTuple_Check(obj) ||