On Mon, 12 Nov 2018 10:55:47 +0100, Boris Feld wrote: > # HG changeset patch > # User Boris Feld <boris.f...@octobus.net> > # Date 1541785632 -3600 > # Fri Nov 09 18:47:12 2018 +0100 > # Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0 > # Parent 0ea42453fa491793d1e145f5093b65e84cb65e97 > # EXP-Topic sparse-perf > # Available At https://bitbucket.org/octobus/mercurial-devel/ > # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r > 4a1104eade1d > sparse-revlog: introduce native (C) implementation of slicechunktodensity
Just quickly scanned this patch, no other patches in this series nor "sparse" logic is reviewed. > +struct Gap { > + long size; > + long idx; > +}; > + > +static int gap_compare(const void *left, const void *right) > +{ > + return ((struct Gap *)left)->size - ((struct Gap *)right)->size; > +} > +static int long_compare(const void *left, const void *right) > +{ > + return *(long *)left - *(long *)right; > +} Nit: (const <type> *)<left|right> as the argument is const void*. If we're sure 'left - right' never exceeds the int range, it might be better to explicitly cast the result to (int) to silence possible warning. > +static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args) > +{ > + /* method arguments */ > + PyObject *list_revs = NULL; /* revisions in the chain */ > + double targetdensity = 0.5; /* min density to achieve */ > + long mingapsize = 0; /* threshold to ignore gaps */ > + > + /* other core variables */ > + long i; /* used for various iteration */ > + PyObject *result = NULL; /* the final return of the function */ > + > + /* generic information about the delta chain being slice */ > + Py_ssize_t num_revs = 0; /* size of the full delta chain */ > + Py_ssize_t *revs = NULL; /* native array of revision in the chain */ > + long chainpayload = 0; /* sum of all delta in the chain */ > + long deltachainspan = 0; /* distance from first byte to last byte */ > + > + /* variable used for slicing the delta chain */ > + long readdata = 0; /* amount of data currently planned to be read */ > + double density = 0; /* ration of payload data compared to read ones */ > + struct Gap *gaps = NULL; /* array of notable gap in the chain */ > + long num_gaps = 0; /* total number of notable gap recorded so far */ > + Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */ > + long num_selected = 0; /* number of gaps skipped */ Maybe i, num_rgaps, num_selected should be Py_ssize_t? In general, long can be narrower than ssize_t. > + PyObject *chunk = NULL; /* individual slice */ > + PyObject *allchunks = PyList_New(num_selected); /* all slices */ Needs to make sure that allchunks isn't NULL. And it's probably better to just initialize the list with literal 0, since we have no for-loop to fill in the list items up to num_selected. > + /* parsing argument */ > + if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs, > + &targetdensity, &mingapsize)) { > + goto bail; > + } > + > + /* If the delta chain contains a single element, we do not need slicing > + */ > + num_revs = PyList_GET_SIZE(list_revs); > + if (num_revs <= 1) { > + result = PyTuple_Pack(1, list_revs); > + goto done; > + } > + > + /* Turn the python list into a native integer array (for efficiency) */ > + revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t)); num_revs * sizeof(...) can overflow. Using calloc() is safer if the zeroing cost isn't significant. > + if (revs == NULL) { > + PyErr_NoMemory(); > + goto bail; > + } > + for (i = 0; i < num_revs; i++) { > + Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i)); > + if (revnum == -1 && PyErr_Occurred()) { > + goto bail; > + } > + revs[i] = revnum; > + } Are we sure revnum is in valid range? > + > + /* Compute and check various property of the unsliced delta chain */ > + deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]); > + > + if (deltachainspan <= mingapsize) { > + result = PyTuple_Pack(1, list_revs); > + goto done; > + } > + chainpayload = 0; > + for (i = 0; i < num_revs; i++) { > + chainpayload += index_get_length(self, revs[i]); > + } > + > + readdata = deltachainspan; > + density = 1.0; > + > + if (0 < deltachainspan) { > + density = (double)chainpayload / (double)deltachainspan; > + }; > + > + if (density >= targetdensity) { > + result = PyTuple_Pack(1, list_revs); > + goto done; > + } > + > + /* if chain is too sparse, look for relevant gaps */ > + gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap)); This, too. > + if (gaps == NULL) { > + PyErr_NoMemory(); > + goto bail; > + } > + > + Py_ssize_t previous_end = -1; > + for (i = 0; i < num_revs; i++) { > + Py_ssize_t revstart = index_get_start(self, revs[i]); > + Py_ssize_t revsize = index_get_length(self, revs[i]); > + if (revsize <= 0) { > + continue; > + } > + if (previous_end >= 0) { > + Py_ssize_t gapsize = revstart - previous_end; > + if (gapsize > mingapsize) { > + gaps[num_gaps].size = gapsize; > + gaps[num_gaps].idx = i; > + num_gaps += 1; > + } > + } > + previous_end = revstart + revsize; > + } > + if (num_gaps == 0) { > + result = PyTuple_Pack(1, list_revs); > + goto done; > + } > + qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare); > + > + /* Slice the largest gap first, they improve the density the most */ > + selected_indices = > + (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t)); > + if (selected_indices == NULL) { > + PyErr_NoMemory(); > + goto bail; > + } > + > + for (i = num_gaps - 1; i >= 0; i--) { > + selected_indices[num_selected] = gaps[i].idx; > + readdata -= gaps[i].size; > + num_selected += 1; > + if (readdata <= 0) { > + density = 1.0; > + } else { > + density = (double)chainpayload / (double)readdata; > + } > + if (density >= targetdensity) { > + break; > + } > + } > + qsort(selected_indices, num_selected, sizeof(Py_ssize_t), > + &long_compare); Py_ssize_t != long on Windows, IIRC. > + /* create the resulting slice */ > + long previdx = 0; > + selected_indices[num_selected] = num_revs; > + for (i = 0; i <= num_selected; i++) { > + long idx = selected_indices[i]; > + chunk = _trimchunk(self, list_revs, previdx, idx); > + if (chunk == NULL) { > + goto bail; > + } > + if (PyList_GET_SIZE(chunk) > 0) { > + if (PyList_Append(allchunks, chunk) == -1) { > + goto bail; > + } > + } > + Py_DECREF(chunk); > + chunk = NULL; > + previdx = idx; > + } > + result = allchunks; > + goto done; > + > +bail: > + if (allchunks != NULL) { > + Py_DECREF(allchunks); > + } > + if (chunk != NULL) { > + Py_DECREF(chunk); > + } Py_XDECREF() can be used instead of != NULL. > +done: > + if (revs != NULL) { > + free(revs); > + } > + if (gaps != NULL) { > + free(gaps); > + } > + if (selected_indices != NULL) { > + free(selected_indices); > + } Nit: free(NULL) is guaranteed to be noop. No need to check != NULL. > + if (result != NULL) { > + Py_INCREF(result); > + } Looks like an excessive incref as PyTuple_Pack() returns a new reference for example. > + return result; > +} _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel