Alphare created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9846

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/cext/revlog.c
  mercurial/revlog.py

CHANGE DETAILS

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -69,7 +69,6 @@
     templatefilters,
     util,
 )
-from .pure import parsers as pureparsers
 from .interfaces import (
     repository,
     util as interfaceutil,
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -98,6 +98,7 @@
        int ntlookups;          /* # lookups */
        int ntmisses;           /* # lookups that miss the cache */
        int inlined;
+       long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
 };
 
 static Py_ssize_t index_length(const indexObject *self)
@@ -113,14 +114,19 @@
 static int index_find_node(indexObject *self, const char *node);
 
 #if LONG_MAX == 0x7fffffffL
-static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
+static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
+static const char *const v2_tuple_format = PY23("Kiiiiiis#KiKi", 
"Kiiiiiiy#KiKi");
 #else
-static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
+static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
+static const char *const v2_tuple_format = PY23("kiiiiiis#kiki", 
"kiiiiiiy#kiki");
 #endif
 
 /* A RevlogNG v1 index entry is 64 bytes long. */
 static const long v1_hdrsize = 64;
 
+/* A Revlogv2 index entry is 96 bytes long. */
+static const long v2_hdrsize = 96;
+
 static void raise_revlog_error(void)
 {
        PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
@@ -157,7 +163,7 @@
 static const char *index_deref(indexObject *self, Py_ssize_t pos)
 {
        if (pos >= self->length)
-               return self->added + (pos - self->length) * v1_hdrsize;
+               return self->added + (pos - self->length) * self->hdrsize;
 
        if (self->inlined && pos > 0) {
                if (self->offsets == NULL) {
@@ -174,7 +180,7 @@
                return self->offsets[pos];
        }
 
-       return (const char *)(self->buf.buf) + pos * v1_hdrsize;
+       return (const char *)(self->buf.buf) + pos * self->hdrsize;
 }
 
 /*
@@ -280,8 +286,9 @@
  */
 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
 {
-       uint64_t offset_flags;
-       int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
+       uint64_t offset_flags, sidedata_offset, unified_revlog_id;
+       int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
+    sidedata_comp_len, rank;
        const char *c_node_id;
        const char *data;
        Py_ssize_t length = index_length(self);
@@ -320,9 +327,22 @@
        parent_2 = getbe32(data + 28);
        c_node_id = data + 32;
 
-       return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
-                            base_rev, link_rev, parent_1, parent_2, c_node_id,
-                            self->nodelen);
+  if (self->hdrsize == v1_hdrsize) {
+    return Py_BuildValue(v1_tuple_format, offset_flags, comp_len, uncomp_len,
+                         base_rev, link_rev, parent_1, parent_2, c_node_id,
+                         self->nodelen);
+  } else {
+    unified_revlog_id = getbe64(data + 64);
+    rank = getbe32(data + 72);
+    sidedata_offset = getbe64(data + 76);
+    sidedata_comp_len = getbe32(data + 84);
+
+    return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
+                         uncomp_len, base_rev, link_rev, parent_1,
+                         parent_2, c_node_id, self->nodelen,
+                         unified_revlog_id, rank,
+                         sidedata_offset, sidedata_comp_len);
+  }
 }
 
 /*
@@ -373,18 +393,30 @@
 
 static PyObject *index_append(indexObject *self, PyObject *obj)
 {
-       uint64_t offset_flags;
+       uint64_t offset_flags, unified_revlog_id, sidedata_offset;
        int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
-       Py_ssize_t c_node_id_len;
+       Py_ssize_t c_node_id_len, rank, sidedata_comp_len;
        const char *c_node_id;
        char *data;
 
-       if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
-                             &uncomp_len, &base_rev, &link_rev, &parent_1,
-                             &parent_2, &c_node_id, &c_node_id_len)) {
-               PyErr_SetString(PyExc_TypeError, "8-tuple required");
-               return NULL;
+  if (self->hdrsize == v1_hdrsize) {
+    if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags, &comp_len,
+                          &uncomp_len, &base_rev, &link_rev, &parent_1,
+                          &parent_2, &c_node_id, &c_node_id_len)) {
+      PyErr_SetString(PyExc_TypeError, "8-tuple required");
+      return NULL;
+    }
+       } else {
+    if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags, &comp_len,
+                          &uncomp_len, &base_rev, &link_rev, &parent_1,
+                          &parent_2, &c_node_id, &c_node_id_len,
+                          &unified_revlog_id, &rank, &sidedata_offset,
+                          &sidedata_comp_len)) {
+      PyErr_SetString(PyExc_TypeError, "12-tuple required");
+      return NULL;
+    }
        }
+
        if (c_node_id_len != self->nodelen) {
                PyErr_SetString(PyExc_TypeError, "invalid node");
                return NULL;
@@ -394,14 +426,14 @@
                size_t new_added_length =
                    self->added_length ? self->added_length * 2 : 4096;
                void *new_added =
-                   PyMem_Realloc(self->added, new_added_length * v1_hdrsize);
+                   PyMem_Realloc(self->added, new_added_length * 
self->hdrsize);
                if (!new_added)
                        return PyErr_NoMemory();
                self->added = new_added;
                self->added_length = new_added_length;
        }
        rev = self->length + self->new_length;
-       data = self->added + v1_hdrsize * self->new_length++;
+       data = self->added + self->hdrsize * self->new_length++;
        putbe32(offset_flags >> 32, data);
        putbe32(offset_flags & 0xffffffffU, data + 4);
        putbe32(comp_len, data + 8);
@@ -411,7 +443,17 @@
        putbe32(parent_1, data + 24);
        putbe32(parent_2, data + 28);
        memcpy(data + 32, c_node_id, c_node_id_len);
+       /* Padding since SHA-1 is only 20 bytes for now */
        memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
+       if (self->hdrsize != v1_hdrsize) {
+               putbe64(unified_revlog_id, data + 64);
+               putbe32(rank, data + 72);
+               putbe64(sidedata_offset, data + 76);
+               putbe32(sidedata_comp_len, data + 84);
+         /* Padding for 96 bytes alignment */
+               memset(data + 88, 0, self->hdrsize - 88);
+       }
+
 
        if (self->ntinitialized)
                nt_insert(&self->nt, c_node_id, rev);
@@ -2563,14 +2605,17 @@
        const char *data = (const char *)self->buf.buf;
        Py_ssize_t pos = 0;
        Py_ssize_t end = self->buf.len;
-       long incr = v1_hdrsize;
+       long incr = self->hdrsize;
        Py_ssize_t len = 0;
 
-       while (pos + v1_hdrsize <= end && pos >= 0) {
-               uint32_t comp_len;
+       while (pos + self->hdrsize <= end && pos >= 0) {
+               uint32_t comp_len, sidedata_comp_len = 0;
                /* 3rd element of header is length of compressed inline data */
                comp_len = getbe32(data + pos + 8);
-               incr = v1_hdrsize + comp_len;
+               if (self->hdrsize == v2_hdrsize) {
+                       sidedata_comp_len = getbe32(data + pos + 84);
+               }
+               incr = self->hdrsize + comp_len + sidedata_comp_len;
                if (offsets)
                        offsets[len] = data + pos;
                len++;
@@ -2586,11 +2631,13 @@
        return len;
 }
 
-static int index_init(indexObject *self, PyObject *args)
+static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
 {
-       PyObject *data_obj, *inlined_obj;
+       PyObject *data_obj, *inlined_obj, *revlogv2;
        Py_ssize_t size;
 
+       static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
+
        /* Initialize before argument-checking to avoid index_dealloc() crash.
         */
        self->added = NULL;
@@ -2606,7 +2653,9 @@
        self->nodelen = 20;
        self->nullentry = NULL;
 
-       if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
+  revlogv2 = NULL;
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
+                                   &data_obj, &inlined_obj, &revlogv2))
                return -1;
        if (!PyObject_CheckBuffer(data_obj)) {
                PyErr_SetString(PyExc_TypeError,
@@ -2618,8 +2667,22 @@
                return -1;
        }
 
-       self->nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
-                                       -1, -1, -1, -1, nullid, self->nodelen);
+       if (revlogv2 && PyObject_IsTrue(revlogv2)) {
+               self->hdrsize = v2_hdrsize;
+       } else {
+               self->hdrsize = v1_hdrsize;
+       }
+
+       if (self->hdrsize == v1_hdrsize) {
+               self->nullentry =
+                   Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
+                                 -1, -1, -1, nullid, self->nodelen);
+       } else {
+               self->nullentry =
+                   Py_BuildValue(PY23("iiiiiiis#iiii", "iiiiiiiy#iiii"), 0, 0, 
0,
+                                 -1, -1, -1, -1, nullid, self->nodelen, 0, 0, 
0, 0);
+       }
+
        if (!self->nullentry)
                return -1;
        PyObject_GC_UnTrack(self->nullentry);
@@ -2641,11 +2704,11 @@
                        goto bail;
                self->length = len;
        } else {
-               if (size % v1_hdrsize) {
+               if (size % self->hdrsize) {
                        PyErr_SetString(PyExc_ValueError, "corrupt index file");
                        goto bail;
                }
-               self->length = size / v1_hdrsize;
+               self->length = size / self->hdrsize;
        }
 
        return 0;
@@ -2797,16 +2860,16 @@
 };
 
 /*
- * returns a tuple of the form (index, index, cache) with elements as
+ * returns a tuple of the form (index, cache) with elements as
  * follows:
  *
- * index: an index object that lazily parses RevlogNG records
+ * index: an index object that lazily parses Revlog (v1 or v2) records
  * cache: if data is inlined, a tuple (0, index_file_content), else None
  *        index_file_content could be a string, or a buffer
  *
  * added complications are for backwards compatibility
  */
-PyObject *parse_index2(PyObject *self, PyObject *args)
+PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
 {
        PyObject *cache = NULL;
        indexObject *idx;
@@ -2816,7 +2879,7 @@
        if (idx == NULL)
                goto bail;
 
-       ret = index_init(idx, args);
+       ret = index_init(idx, args, kwargs);
        if (ret == -1)
                goto bail;
 
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -638,7 +638,7 @@
 PyObject *encodedir(PyObject *self, PyObject *args);
 PyObject *pathencode(PyObject *self, PyObject *args);
 PyObject *lowerencode(PyObject *self, PyObject *args);
-PyObject *parse_index2(PyObject *self, PyObject *args);
+PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
 
 static PyMethodDef methods[] = {
     {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
@@ -646,7 +646,8 @@
      "create a set containing non-normal and other parent entries of given "
      "dirstate\n"},
     {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
-    {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
+    {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
+     "parse a revlog index\n"},
     {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
     {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
     {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},



To: Alphare, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to