D2602: bdiff: add a xdiffblocks method
This revision was automatically updated to reflect the committed changes. Closed by commit rHGd74589c85f5e: bdiff: add a xdiffblocks method (authored by quark, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2602?vs=6480=6516 REVISION DETAIL https://phab.mercurial-scm.org/D2602 AFFECTED FILES mercurial/cext/bdiff.c mercurial/policy.py setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -847,14 +847,33 @@ if sys.platform == 'darwin': osutil_ldflags += ['-framework', 'ApplicationServices'] +xdiff_srcs = [ +'mercurial/thirdparty/xdiff/xdiffi.c', +'mercurial/thirdparty/xdiff/xemit.c', +'mercurial/thirdparty/xdiff/xmerge.c', +'mercurial/thirdparty/xdiff/xprepare.c', +'mercurial/thirdparty/xdiff/xutils.c', +] + +xdiff_headers = [ +'mercurial/thirdparty/xdiff/xdiff.h', +'mercurial/thirdparty/xdiff/xdiffi.h', +'mercurial/thirdparty/xdiff/xemit.h', +'mercurial/thirdparty/xdiff/xinclude.h', +'mercurial/thirdparty/xdiff/xmacros.h', +'mercurial/thirdparty/xdiff/xprepare.h', +'mercurial/thirdparty/xdiff/xtypes.h', +'mercurial/thirdparty/xdiff/xutils.h', +] + extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], include_dirs=common_include_dirs, depends=common_depends), Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c', - 'mercurial/cext/bdiff.c'], + 'mercurial/cext/bdiff.c'] + xdiff_srcs, include_dirs=common_include_dirs, - depends=common_depends + ['mercurial/bdiff.h']), + depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers), Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'], include_dirs=common_include_dirs, depends=common_depends), diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -66,7 +66,7 @@ # keep in sync with "version" in C modules _cextversions = { (r'cext', r'base85'): 1, -(r'cext', r'bdiff'): 2, +(r'cext', r'bdiff'): 3, (r'cext', r'diffhelpers'): 1, (r'cext', r'mpatch'): 1, (r'cext', r'osutil'): 3, diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c --- a/mercurial/cext/bdiff.c +++ b/mercurial/cext/bdiff.c @@ -17,6 +17,7 @@ #include "bdiff.h" #include "bitmanipulation.h" +#include "thirdparty/xdiff/xdiff.h" #include "util.h" static PyObject *blocks(PyObject *self, PyObject *args) @@ -256,18 +257,78 @@ return NULL; } +static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv) +{ + PyObject *rl = (PyObject *)priv; + PyObject *m = Py_BuildValue("", a1, a2, b1, b2); + if (!m) + return -1; + if (PyList_Append(rl, m) != 0) { + Py_DECREF(m); + return -1; + } + return 0; +} + +static PyObject *xdiffblocks(PyObject *self, PyObject *args) +{ + Py_ssize_t la, lb; + mmfile_t a, b; + PyObject *rl; + + xpparam_t xpp = { + XDF_INDENT_HEURISTIC, /* flags */ + NULL, /* anchors */ + 0,/* anchors_nr */ + }; + xdemitconf_t xecfg = { + 0, /* ctxlen */ + 0, /* interhunkctxlen */ + XDL_EMIT_BDIFFHUNK, /* flags */ + NULL, /* find_func */ + NULL, /* find_func_priv */ + hunk_consumer, /* hunk_consume_func */ + }; + xdemitcb_t ecb = { + NULL, /* priv */ + NULL, /* outf */ + }; + + if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), , , , + )) + return NULL; + + a.size = la; + b.size = lb; + + rl = PyList_New(0); + if (!rl) + return PyErr_NoMemory(); + + ecb.priv = rl; + + if (xdl_diff(, , , , ) != 0) { + Py_DECREF(rl); + return PyErr_NoMemory(); + } + + return rl; +} + static char mdiff_doc[] = "Efficient binary diff."; static PyMethodDef methods[] = { {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, {"splitnewlines", splitnewlines, METH_VARARGS, "like str.splitlines, but only split on newlines\n"}, +{"xdiffblocks", xdiffblocks, METH_VARARGS, + "find a list of matching lines using xdiff algorithm\n"}, {NULL, NULL}, }; -static const int version = 2; +static const int version = 3; #ifdef IS_PY3K static struct PyModuleDef bdiff_module = { To: quark, #hg-reviewers, indygreg, durin42 Cc: indygreg, mercurial-devel
D2602: bdiff: add a xdiffblocks method
quark updated this revision to Diff 6480. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2602?vs=6476=6480 REVISION DETAIL https://phab.mercurial-scm.org/D2602 AFFECTED FILES mercurial/cext/bdiff.c mercurial/policy.py setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -847,14 +847,33 @@ if sys.platform == 'darwin': osutil_ldflags += ['-framework', 'ApplicationServices'] +xdiff_srcs = [ +'mercurial/thirdparty/xdiff/xdiffi.c', +'mercurial/thirdparty/xdiff/xemit.c', +'mercurial/thirdparty/xdiff/xmerge.c', +'mercurial/thirdparty/xdiff/xprepare.c', +'mercurial/thirdparty/xdiff/xutils.c', +] + +xdiff_headers = [ +'mercurial/thirdparty/xdiff/xdiff.h', +'mercurial/thirdparty/xdiff/xdiffi.h', +'mercurial/thirdparty/xdiff/xemit.h', +'mercurial/thirdparty/xdiff/xinclude.h', +'mercurial/thirdparty/xdiff/xmacros.h', +'mercurial/thirdparty/xdiff/xprepare.h', +'mercurial/thirdparty/xdiff/xtypes.h', +'mercurial/thirdparty/xdiff/xutils.h', +] + extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], include_dirs=common_include_dirs, depends=common_depends), Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c', - 'mercurial/cext/bdiff.c'], + 'mercurial/cext/bdiff.c'] + xdiff_srcs, include_dirs=common_include_dirs, - depends=common_depends + ['mercurial/bdiff.h']), + depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers), Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'], include_dirs=common_include_dirs, depends=common_depends), diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -66,7 +66,7 @@ # keep in sync with "version" in C modules _cextversions = { (r'cext', r'base85'): 1, -(r'cext', r'bdiff'): 2, +(r'cext', r'bdiff'): 3, (r'cext', r'diffhelpers'): 1, (r'cext', r'mpatch'): 1, (r'cext', r'osutil'): 3, diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c --- a/mercurial/cext/bdiff.c +++ b/mercurial/cext/bdiff.c @@ -17,6 +17,7 @@ #include "bdiff.h" #include "bitmanipulation.h" +#include "thirdparty/xdiff/xdiff.h" #include "util.h" static PyObject *blocks(PyObject *self, PyObject *args) @@ -229,18 +230,78 @@ return NULL; } +static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv) +{ + PyObject *rl = (PyObject *)priv; + PyObject *m = Py_BuildValue("", a1, a2, b1, b2); + if (!m) + return -1; + if (PyList_Append(rl, m) != 0) { + Py_DECREF(m); + return -1; + } + return 0; +} + +static PyObject *xdiffblocks(PyObject *self, PyObject *args) +{ + Py_ssize_t la, lb; + mmfile_t a, b; + PyObject *rl; + + xpparam_t xpp = { + XDF_INDENT_HEURISTIC, /* flags */ + NULL, /* anchors */ + 0,/* anchors_nr */ + }; + xdemitconf_t xecfg = { + 0, /* ctxlen */ + 0, /* interhunkctxlen */ + XDL_EMIT_BDIFFHUNK, /* flags */ + NULL, /* find_func */ + NULL, /* find_func_priv */ + hunk_consumer, /* hunk_consume_func */ + }; + xdemitcb_t ecb = { + NULL, /* priv */ + NULL, /* outf */ + }; + + if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), , , , + )) + return NULL; + + a.size = la; + b.size = lb; + + rl = PyList_New(0); + if (!rl) + return PyErr_NoMemory(); + + ecb.priv = rl; + + if (xdl_diff(, , , , ) != 0) { + Py_DECREF(rl); + return PyErr_NoMemory(); + } + + return rl; +} + static char mdiff_doc[] = "Efficient binary diff."; static PyMethodDef methods[] = { {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, {"splitnewlines", splitnewlines, METH_VARARGS, "like str.splitlines, but only split on newlines\n"}, +{"xdiffblocks", xdiffblocks, METH_VARARGS, + "find a list of matching lines using xdiff algorithm\n"}, {NULL, NULL}, }; -static const int version = 2; +static const int version = 3; #ifdef IS_PY3K static struct PyModuleDef bdiff_module = { To: quark, #hg-reviewers, indygreg Cc: indygreg, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org
D2602: bdiff: add a xdiffblocks method
quark added inline comments. INLINE COMMENTS > indygreg wrote in bdiff.c:239 > Calling `PyList_Append()` in tight loops can be a bit slow. It is faster to > allocate an array of `PyObject` and then allocate a `PyList` of final size > and call `PyList_SET_ITEM` to populate it. But we can optimize this later: > this is definitely the easiest first implementation. Ideally, xdiff could return how many lines the input string has. So we can preallocate. But it's not in the current xdiff API. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2602 To: quark, #hg-reviewers, indygreg Cc: indygreg, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2602: bdiff: add a xdiffblocks method
quark added inline comments. INLINE COMMENTS > indygreg wrote in bdiff.c:263 > Does our C standard level allow to declare variables after non-declarations > in blocks? Not sure. But the rest is C89. So I'll move this up. > indygreg wrote in bdiff.c:283 > This exception type is nonsensical. But it is what `blocks()` uses. So not > worth worrying about. It actually makes some sense - if the xdiff library is solid, then `hunk_consumer` returning -1 is the only reason things go wrong. And that's caused by NoMemory. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2602 To: quark, #hg-reviewers, indygreg Cc: indygreg, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2602: bdiff: add a xdiffblocks method
indygreg accepted this revision. indygreg added inline comments. This revision is now accepted and ready to land. INLINE COMMENTS > bdiff.c:239 > + return -1; > + if (PyList_Append(rl, m) != 0) { > + Py_DECREF(m); Calling `PyList_Append()` in tight loops can be a bit slow. It is faster to allocate an array of `PyObject` and then allocate a `PyList` of final size and call `PyList_SET_ITEM` to populate it. But we can optimize this later: this is definitely the easiest first implementation. > bdiff.c:263 > + > + xpparam_t xpp = { > + XDF_INDENT_HEURISTIC, /* flags */ Does our C standard level allow to declare variables after non-declarations in blocks? > bdiff.c:283 > + Py_DECREF(rl); > + return PyErr_NoMemory(); > + } This exception type is nonsensical. But it is what `blocks()` uses. So not worth worrying about. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2602 To: quark, #hg-reviewers, indygreg Cc: indygreg, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2602: bdiff: add a xdiffblocks method
quark updated this revision to Diff 6476. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2602?vs=6459=6476 REVISION DETAIL https://phab.mercurial-scm.org/D2602 AFFECTED FILES mercurial/cext/bdiff.c mercurial/policy.py setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -847,14 +847,33 @@ if sys.platform == 'darwin': osutil_ldflags += ['-framework', 'ApplicationServices'] +xdiff_srcs = [ +'mercurial/thirdparty/xdiff/xdiffi.c', +'mercurial/thirdparty/xdiff/xemit.c', +'mercurial/thirdparty/xdiff/xmerge.c', +'mercurial/thirdparty/xdiff/xprepare.c', +'mercurial/thirdparty/xdiff/xutils.c', +] + +xdiff_headers = [ +'mercurial/thirdparty/xdiff/xdiff.h', +'mercurial/thirdparty/xdiff/xdiffi.h', +'mercurial/thirdparty/xdiff/xemit.h', +'mercurial/thirdparty/xdiff/xinclude.h', +'mercurial/thirdparty/xdiff/xmacros.h', +'mercurial/thirdparty/xdiff/xprepare.h', +'mercurial/thirdparty/xdiff/xtypes.h', +'mercurial/thirdparty/xdiff/xutils.h', +] + extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], include_dirs=common_include_dirs, depends=common_depends), Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c', - 'mercurial/cext/bdiff.c'], + 'mercurial/cext/bdiff.c'] + xdiff_srcs, include_dirs=common_include_dirs, - depends=common_depends + ['mercurial/bdiff.h']), + depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers), Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'], include_dirs=common_include_dirs, depends=common_depends), diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -66,7 +66,7 @@ # keep in sync with "version" in C modules _cextversions = { (r'cext', r'base85'): 1, -(r'cext', r'bdiff'): 2, +(r'cext', r'bdiff'): 3, (r'cext', r'diffhelpers'): 1, (r'cext', r'mpatch'): 1, (r'cext', r'osutil'): 3, diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c --- a/mercurial/cext/bdiff.c +++ b/mercurial/cext/bdiff.c @@ -17,6 +17,7 @@ #include "bdiff.h" #include "bitmanipulation.h" +#include "thirdparty/xdiff/xdiff.h" #include "util.h" static PyObject *blocks(PyObject *self, PyObject *args) @@ -229,18 +230,76 @@ return NULL; } +static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv) +{ + PyObject *rl = (PyObject *)priv; + PyObject *m = Py_BuildValue("", a1, a2, b1, b2); + if (!m) + return -1; + if (PyList_Append(rl, m) != 0) { + Py_DECREF(m); + return -1; + } + return 0; +} + +static PyObject *xdiffblocks(PyObject *self, PyObject *args) +{ + Py_ssize_t la, lb; + mmfile_t a, b; + PyObject *rl; + + if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), , , , + )) + return NULL; + + a.size = la; + b.size = lb; + + rl = PyList_New(0); + if (!rl) + return PyErr_NoMemory(); + + xpparam_t xpp = { + XDF_INDENT_HEURISTIC, /* flags */ + NULL, /* anchors */ + 0,/* anchors_nr */ + }; + xdemitconf_t xecfg = { + 0, /* ctxlen */ + 0, /* interhunkctxlen */ + XDL_EMIT_BDIFFHUNK, /* flags */ + NULL, /* find_func */ + NULL, /* find_func_priv */ + hunk_consumer, /* hunk_consume_func */ + }; + xdemitcb_t ecb = { + rl, /* priv */ + NULL, /* outf */ + }; + + if (xdl_diff(, , , , ) != 0) { + Py_DECREF(rl); + return PyErr_NoMemory(); + } + + return rl; +} + static char mdiff_doc[] = "Efficient binary diff."; static PyMethodDef methods[] = { {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, {"splitnewlines", splitnewlines, METH_VARARGS, "like str.splitlines, but only split on newlines\n"}, +{"xdiffblocks", xdiffblocks, METH_VARARGS, + "find a list of matching lines using xdiff algorithm\n"}, {NULL, NULL}, }; -static const int version = 2; +static const int version = 3; #ifdef IS_PY3K static struct PyModuleDef bdiff_module = { To: quark, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2602: bdiff: add a xdiffblocks method
quark created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This is similar to `bdiff.blocks`, but uses xdiff as the backend. The indent heuristic is turned on by default since it has little overhead and improves diff quality significantly. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2602 AFFECTED FILES mercurial/cext/bdiff.c mercurial/policy.py setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -847,14 +847,33 @@ if sys.platform == 'darwin': osutil_ldflags += ['-framework', 'ApplicationServices'] +xdiff_srcs = [ +'mercurial/third-party/xdiff/xdiffi.c', +'mercurial/third-party/xdiff/xemit.c', +'mercurial/third-party/xdiff/xmerge.c', +'mercurial/third-party/xdiff/xprepare.c', +'mercurial/third-party/xdiff/xutils.c', +] + +xdiff_headers = [ +'mercurial/third-party/xdiff/xdiff.h', +'mercurial/third-party/xdiff/xdiffi.h', +'mercurial/third-party/xdiff/xemit.h', +'mercurial/third-party/xdiff/xinclude.h', +'mercurial/third-party/xdiff/xmacros.h', +'mercurial/third-party/xdiff/xprepare.h', +'mercurial/third-party/xdiff/xtypes.h', +'mercurial/third-party/xdiff/xutils.h', +] + extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], include_dirs=common_include_dirs, depends=common_depends), Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c', - 'mercurial/cext/bdiff.c'], + 'mercurial/cext/bdiff.c'] + xdiff_srcs, include_dirs=common_include_dirs, - depends=common_depends + ['mercurial/bdiff.h']), + depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers), Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'], include_dirs=common_include_dirs, depends=common_depends), diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -66,7 +66,7 @@ # keep in sync with "version" in C modules _cextversions = { (r'cext', r'base85'): 1, -(r'cext', r'bdiff'): 2, +(r'cext', r'bdiff'): 3, (r'cext', r'diffhelpers'): 1, (r'cext', r'mpatch'): 1, (r'cext', r'osutil'): 3, diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c --- a/mercurial/cext/bdiff.c +++ b/mercurial/cext/bdiff.c @@ -17,6 +17,7 @@ #include "bdiff.h" #include "bitmanipulation.h" +#include "third-party/xdiff/xdiff.h" #include "util.h" static PyObject *blocks(PyObject *self, PyObject *args) @@ -229,18 +230,76 @@ return NULL; } +static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv) +{ + PyObject *rl = (PyObject *)priv; + PyObject *m = Py_BuildValue("", a1, a2, b1, b2); + if (!m) + return -1; + if (PyList_Append(rl, m) != 0) { + Py_DECREF(m); + return -1; + } + return 0; +} + +static PyObject *xdiffblocks(PyObject *self, PyObject *args) +{ + Py_ssize_t la, lb; + mmfile_t a, b; + PyObject *rl; + + if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), , , , + )) + return NULL; + + a.size = la; + b.size = lb; + + rl = PyList_New(0); + if (!rl) + return PyErr_NoMemory(); + + xpparam_t xpp = { + XDF_INDENT_HEURISTIC, /* flags */ + NULL, /* anchors */ + 0,/* anchors_nr */ + }; + xdemitconf_t xecfg = { + 0, /* ctxlen */ + 0, /* interhunkctxlen */ + XDL_EMIT_BDIFFHUNK, /* flags */ + NULL, /* find_func */ + NULL, /* find_func_priv */ + hunk_consumer, /* hunk_consume_func */ + }; + xdemitcb_t ecb = { + rl, /* priv */ + NULL, /* outf */ + }; + + if (xdl_diff(, , , , ) != 0) { + Py_DECREF(rl); + return PyErr_NoMemory(); + } + + return rl; +} + static char mdiff_doc[] = "Efficient binary diff."; static PyMethodDef methods[] = { {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, {"splitnewlines", splitnewlines, METH_VARARGS, "like str.splitlines, but only split on newlines\n"}, +{"xdiffblocks", xdiffblocks, METH_VARARGS, + "find a list of matching lines using xdiff algorithm\n"}, {NULL, NULL}, }; -static const int version = 2; +static const int version = 3; #ifdef IS_PY3K static struct PyModuleDef bdiff_module = { To: quark, #hg-reviewers Cc: