[PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
# HG changeset patch # User Mads Kiilerich # Date 1479322051 -3600 # Wed Nov 16 19:47:31 2016 +0100 # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 bdiff: early pruning of common suffix before doing expensive computations Similar to the previous change, but trimming trailing lines. On mozilla-unified: perfbdiff -m 3041e4d59df2 ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to perfbdiff 0e9928989e9c --alldata --count 10 ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c --- a/mercurial/bdiff_module.c +++ b/mercurial/bdiff_module.c @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P struct bdiff_line *al, *bl; struct bdiff_hunk l, *h; int an, bn, count; - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax; PyThreadState *_save; l.next = NULL; @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P if (*ia == '\n') lcommon = li + 1; /* we can almost add: if (li == lmax) lcommon = li; */ + lmax -= lcommon; + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; +li <= lmax && *ia == *ib; +++li, --ia, --ib) + if (*ia == '\n') + lcommonend = li; + /* printf("common %i %i\n", lcommon, lcommonend); */ - an = bdiff_splitlines(sa + lcommon, la - lcommon, &al); - bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl); + an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, &al); + bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, &bl); if (!al || !bl) goto nomem; diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out --- a/tests/test-bdiff.py.out +++ b/tests/test-bdiff.py.out @@ -28,9 +28,9 @@ showdiff( 'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): 'x\n\nx\n\n' 6 6 '' -> 'y\n\n' - 'x\n\n' - 9 9 '' -> 'y\n\n' - 'x\n\nz\n' + 'x\n' + 8 8 '' -> '\ny\n' + '\nx\n\nz\n' showdiff( 'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', 'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'): diff --git a/tests/test-check-code.t b/tests/test-check-code.t --- a/tests/test-check-code.t +++ b/tests/test-check-code.t @@ -15,10 +15,6 @@ New errors are not allowed. Warnings are Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code (glob) Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code (glob) Skipping i18n/polib.py it has no-che?k-code (glob) - mercurial/bdiff_module.c:89: - > //printf("common prefix: %i next line: %i\n", li, llf); - don't use //-style comments Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob) Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob) Skipping mercurial/statprof.py it has no-che?k-code (glob) - [1] ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On 11/17/2016 06:16 PM, Mads Kiilerich wrote: # HG changeset patch # User Mads Kiilerich # Date 1479322051 -3600 # Wed Nov 16 19:47:31 2016 +0100 # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 bdiff: early pruning of common suffix before doing expensive computations Similar to the previous change, but trimming trailing lines. The change itself seems fine and the perf win is very tasty. However, this changesets breaks tests with --pure. I'm tempted to drop it unless we can provide a test update to fold into this. Test failure with --pure: --- /home/marmoute/src/mercurial-dev/tests/test-bdiff.py.out +++ /home/marmoute/src/mercurial-dev/tests/test-bdiff.py.err @@ -28,15 +28,16 @@ 'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): 'x\n\nx\n\n' 6 6 '' -> 'y\n\n' - 'x\n' - 8 8 '' -> '\ny\n' - '\nx\n\nz\n' + 'x\n\n' + 9 9 '' -> 'y\n\n' + 'x\n\nz\n' showdiff( 'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', 'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'): - 'a\nb\nb\n' - 6 6 '' -> 'a\nb\nb\nb\nc\n.\n' - 'b\nc\n.\nd\ne\n' + 0 0 '' -> 'a\nb\nb\n' + 'a\nb\nb\nb\nc\n.\n' + 12 12 '' -> 'b\nc\n.\n' + 'd\ne\n' 16 18 '.\n' -> '' 'f\n' done On mozilla-unified: perfbdiff -m 3041e4d59df2 ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to perfbdiff 0e9928989e9c --alldata --count 10 ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c --- a/mercurial/bdiff_module.c +++ b/mercurial/bdiff_module.c @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P struct bdiff_line *al, *bl; struct bdiff_hunk l, *h; int an, bn, count; - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax; PyThreadState *_save; l.next = NULL; @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P if (*ia == '\n') lcommon = li + 1; /* we can almost add: if (li == lmax) lcommon = li; */ + lmax -= lcommon; + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; +li <= lmax && *ia == *ib; +++li, --ia, --ib) + if (*ia == '\n') + lcommonend = li; + /* printf("common %i %i\n", lcommon, lcommonend); */ - an = bdiff_splitlines(sa + lcommon, la - lcommon, &al); - bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl); + an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, &al); + bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, &bl); if (!al || !bl) goto nomem; diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out --- a/tests/test-bdiff.py.out +++ b/tests/test-bdiff.py.out @@ -28,9 +28,9 @@ showdiff( 'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): 'x\n\nx\n\n' 6 6 '' -> 'y\n\n' - 'x\n\n' - 9 9 '' -> 'y\n\n' - 'x\n\nz\n' + 'x\n' + 8 8 '' -> '\ny\n' + '\nx\n\nz\n' showdiff( 'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', 'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'): diff --git a/tests/test-check-code.t b/tests/test-check-code.t --- a/tests/test-check-code.t +++ b/tests/test-check-code.t @@ -15,10 +15,6 @@ New errors are not allowed. Warnings are Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code (glob) Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code (glob) Skipping i18n/polib.py it has no-che?k-code (glob) - mercurial/bdiff_module.c:89: - >//printf("common prefix: %i next line: %i\n", li, llf); - don't use //-style comments Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob) Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob) Skipping mercurial/statprof.py it has no-che?k-code (glob) - [1] ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel -- Pierre-Yves David ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On Thu, 15 Dec 2016 09:01:11 +0100, Pierre-Yves David wrote: > On 11/17/2016 06:16 PM, Mads Kiilerich wrote: > > # HG changeset patch > > # User Mads Kiilerich > > # Date 1479322051 -3600 > > # Wed Nov 16 19:47:31 2016 +0100 > > # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 > > # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 > > bdiff: early pruning of common suffix before doing expensive computations > > > > Similar to the previous change, but trimming trailing lines. > > The change itself seems fine and the perf win is very tasty. However, > this changesets breaks tests with --pure. > > I'm tempted to drop it unless we can provide a test update to fold into > this. [...] > > --- a/mercurial/bdiff_module.c > > +++ b/mercurial/bdiff_module.c > > @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P > > struct bdiff_line *al, *bl; > > struct bdiff_hunk l, *h; > > int an, bn, count; > > - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; > > + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax; > > PyThreadState *_save; > > > > l.next = NULL; > > @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P > > if (*ia == '\n') > > lcommon = li + 1; > > /* we can almost add: if (li == lmax) lcommon = li; */ > > + lmax -= lcommon; > > + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; > > +li <= lmax && *ia == *ib; > > +++li, --ia, --ib) > > + if (*ia == '\n') > > + lcommonend = li; And there appears to be an off-by-one error. 'li' is a zero-based index and 'lmax' is a length. +1 for dropping this for now. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
> On Dec 15, 2016, at 8:02 AM, Yuya Nishihara wrote: > > On Thu, 15 Dec 2016 09:01:11 +0100, Pierre-Yves David wrote: >> On 11/17/2016 06:16 PM, Mads Kiilerich wrote: >>> # HG changeset patch >>> # User Mads Kiilerich >>> # Date 1479322051 -3600 >>> # Wed Nov 16 19:47:31 2016 +0100 >>> # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 >>> # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 >>> bdiff: early pruning of common suffix before doing expensive computations >>> >>> Similar to the previous change, but trimming trailing lines. >> >> The change itself seems fine and the perf win is very tasty. However, >> this changesets breaks tests with --pure. >> >> I'm tempted to drop it unless we can provide a test update to fold into >> this. > > […] I’d be fine to just #if pure the variations between pure and C, given the immense win this provides. > >>> --- a/mercurial/bdiff_module.c >>> +++ b/mercurial/bdiff_module.c >>> @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P >>> struct bdiff_line *al, *bl; >>> struct bdiff_hunk l, *h; >>> int an, bn, count; >>> - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; >>> + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax; >>> PyThreadState *_save; >>> >>> l.next = NULL; >>> @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P >>> if (*ia == '\n') >>> lcommon = li + 1; >>> /* we can almost add: if (li == lmax) lcommon = li; */ >>> + lmax -= lcommon; >>> + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; >>> +li <= lmax && *ia == *ib; >>> +++li, --ia, --ib) >>> + if (*ia == '\n') >>> + lcommonend = li; > > And there appears to be an off-by-one error. 'li' is a zero-based index and > 'lmax' is a length. > > +1 for dropping this for now. Given the off-by-one, yeah, drop it. I’m a little surprised the bdiff automated stress tester didn’t trip over that. Sigh. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On 12/15/2016 02:09 PM, Augie Fackler wrote: On Dec 15, 2016, at 8:02 AM, Yuya Nishihara wrote: On Thu, 15 Dec 2016 09:01:11 +0100, Pierre-Yves David wrote: On 11/17/2016 06:16 PM, Mads Kiilerich wrote: # HG changeset patch # User Mads Kiilerich # Date 1479322051 -3600 # Wed Nov 16 19:47:31 2016 +0100 # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 bdiff: early pruning of common suffix before doing expensive computations Similar to the previous change, but trimming trailing lines. The change itself seems fine and the perf win is very tasty. However, this changesets breaks tests with --pure. I'm tempted to drop it unless we can provide a test update to fold into this. […] I’d be fine to just #if pure the variations between pure and C, given the immense win this provides. The previous changesets (already public) is also breaking pure. Can one of you submit the necessary test update? --- a/mercurial/bdiff_module.c +++ b/mercurial/bdiff_module.c @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P struct bdiff_line *al, *bl; struct bdiff_hunk l, *h; int an, bn, count; - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax; PyThreadState *_save; l.next = NULL; @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P if (*ia == '\n') lcommon = li + 1; /* we can almost add: if (li == lmax) lcommon = li; */ + lmax -= lcommon; + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; +li <= lmax && *ia == *ib; +++li, --ia, --ib) + if (*ia == '\n') + lcommonend = li; And there appears to be an off-by-one error. 'li' is a zero-based index and 'lmax' is a length. +1 for dropping this for now. Given the off-by-one, yeah, drop it. I’m a little surprised the bdiff automated stress tester didn’t trip over that. -- Pierre-Yves David ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On Thu, Dec 15, 2016 at 8:55 AM, Pierre-Yves David wrote: > The previous changesets (already public) is also breaking pure. Can one of > you submit the necessary test update? Yeah, I'll draft it and push it within the next hour. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On 12/15/2016 02:59 PM, Augie Fackler wrote: On Thu, Dec 15, 2016 at 8:55 AM, Pierre-Yves David wrote: The previous changesets (already public) is also breaking pure. Can one of you submit the necessary test update? Yeah, I'll draft it and push it within the next hour. Thanks! -- Pierre-Yves David ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On Thu, Nov 17, 2016 at 9:16 AM, Mads Kiilerich wrote: > # HG changeset patch > # User Mads Kiilerich > # Date 1479322051 -3600 > # Wed Nov 16 19:47:31 2016 +0100 > # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 > # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 > bdiff: early pruning of common suffix before doing expensive computations > > Similar to the previous change, but trimming trailing lines. > > On mozilla-unified: > perfbdiff -m 3041e4d59df2 > ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to > ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to > perfbdiff 0e9928989e9c --alldata --count 10 > ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) > ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) > > I can collaborate the perf improvements. On mozilla-unified (using a different clone and machine from what I wrote in my commit messages reporting bdiff perf - but the CPU model is the same), these 2 patches result in: $ perfbdiff -m 3041e4d59df2 ! wall 0.040755 comb 0.04 user 0.04 sys 0.00 (best of 100) ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) $ perfbdiff 0e9928989e9c --alldata --count 100 ! wall 4.59 comb 4.58 user 4.58 sys 0.00 (best of 3) ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) Compared to 4.0: $ perfbdiff -m 3041e4d59df2 ! wall 0.076924 comb 0.08 user 0.08 sys 0.00 (best of 100) ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) $ perfbdiff 0e9928989e9c --alldata --count 100 ! wall 8.892216 comb 8.88 user 8.88 sys 0.00 (best of 3) ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) Looking at the assembly and profiling output, I believe I've confirmed my suspicions from my comment on part 1: there is still room to optimize this code. Again, that can be deferred to a follow-up. > diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c > --- a/mercurial/bdiff_module.c > +++ b/mercurial/bdiff_module.c > @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P > struct bdiff_line *al, *bl; > struct bdiff_hunk l, *h; > int an, bn, count; > - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; > + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, > lmax; > PyThreadState *_save; > > l.next = NULL; > @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P > if (*ia == '\n') > lcommon = li + 1; > /* we can almost add: if (li == lmax) lcommon = li; */ > + lmax -= lcommon; > + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; > +li <= lmax && *ia == *ib; > +++li, --ia, --ib) > + if (*ia == '\n') > + lcommonend = li; > + /* printf("common %i %i\n", lcommon, lcommonend); */ > > - an = bdiff_splitlines(sa + lcommon, la - lcommon, &al); > - bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl); > + an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, > &al); > + bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, > &bl); > if (!al || !bl) > goto nomem; > > diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out > --- a/tests/test-bdiff.py.out > +++ b/tests/test-bdiff.py.out > @@ -28,9 +28,9 @@ showdiff( >'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): > 'x\n\nx\n\n' > 6 6 '' -> 'y\n\n' > - 'x\n\n' > - 9 9 '' -> 'y\n\n' > - 'x\n\nz\n' > + 'x\n' > + 8 8 '' -> '\ny\n' > + '\nx\n\nz\n' > showdiff( >'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', >'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'): > diff --git a/tests/test-check-code.t b/tests/test-check-code.t > --- a/tests/test-check-code.t > +++ b/tests/test-check-code.t > @@ -15,10 +15,6 @@ New errors are not allowed. Warnings are >Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code > (glob) >Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code > (glob) >Skipping i18n/polib.py it has no-che?k-code (glob) > - mercurial/bdiff_module.c:89: > - > //printf("common prefix: %i next line: %i\n", li, llf); > - don't use //-style comments >Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob) >Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob) >Skipping mercurial/statprof.py it has no-che?k-code (glob) > - [1] > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel > ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On Thu, Nov 17, 2016 at 11:26 AM, Gregory Szorc wrote: > On Thu, Nov 17, 2016 at 9:16 AM, Mads Kiilerich > wrote: > >> # HG changeset patch >> # User Mads Kiilerich >> # Date 1479322051 -3600 >> # Wed Nov 16 19:47:31 2016 +0100 >> # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 >> # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 >> bdiff: early pruning of common suffix before doing expensive computations >> >> Similar to the previous change, but trimming trailing lines. >> >> On mozilla-unified: >> perfbdiff -m 3041e4d59df2 >> ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to >> ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to >> perfbdiff 0e9928989e9c --alldata --count 10 >> ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) >> ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) >> >> > I can collaborate the perf improvements. On mozilla-unified (using a > different clone and machine from what I wrote in my commit messages > reporting bdiff perf - but the CPU model is the same), these 2 patches > result in: > > $ perfbdiff -m 3041e4d59df2 > ! wall 0.040755 comb 0.04 user 0.04 sys 0.00 (best of 100) > ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) > > $ perfbdiff 0e9928989e9c --alldata --count 100 > ! wall 4.59 comb 4.58 user 4.58 sys 0.00 (best of 3) > ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) > > Compared to 4.0: > $ perfbdiff -m 3041e4d59df2 > ! wall 0.076924 comb 0.08 user 0.08 sys 0.00 (best of 100) > ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304) > > $ perfbdiff 0e9928989e9c --alldata --count 100 > ! wall 8.892216 comb 8.88 user 8.88 sys 0.00 (best of 3) > ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6) > > Looking at the assembly and profiling output, I believe I've confirmed my > suspicions from my comment on part 1: there is still room to optimize this > code. Again, that can be deferred to a follow-up. > It's worth looking at where we're spending CPU time after this series. Measuring `perfbdiff 0e9928989e9c --alldata --count 100`: This series 40% bdiff_module.c:bdiff() 38% bidff.c:bdiff_splitlines() 12% bdiff.c:bdiff_diff() 5% __memcmp_sse4_1 (not sure where exactly this is called from) 4% bdiff.c:recurse() Before prefix and suffix matching: 62% bdiff.c:bdiff_splitlines() 23% bdiff.c.bdiff_diff() 8% memcmp_sse4_1 5% bdiff.c:recurse() 4.0 82% bdiff.c:bdiff_splitlines() 10% bdiff.c:bdiff_diff() 4.5% __memcmp_sse4_1 2.5% bdiff.c:recurse() This series shifted time out of bdiff_splitlines() *and* bdiff_diff(). Furthermore, from 4.0 to this series, the ratio of time spent before bdiff_diff() and inside is roughly the same, despite things getting 5-10x faster in that time! It is alarming we're still spending so much time in what is effectively set-up to run an algorithm. But this is also a good thing: we know the setup code can still be optimized which means there are still significant perf wins on the table. > >> diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c >> --- a/mercurial/bdiff_module.c >> +++ b/mercurial/bdiff_module.c >> @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P >> struct bdiff_line *al, *bl; >> struct bdiff_hunk l, *h; >> int an, bn, count; >> - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; >> + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, >> lmax; >> PyThreadState *_save; >> >> l.next = NULL; >> @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P >> if (*ia == '\n') >> lcommon = li + 1; >> /* we can almost add: if (li == lmax) lcommon = li; */ >> + lmax -= lcommon; >> + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; >> +li <= lmax && *ia == *ib; >> +++li, --ia, --ib) >> + if (*ia == '\n') >> + lcommonend = li; >> + /* printf("common %i %i\n", lcommon, lcommonend); */ >> >> - an = bdiff_splitlines(sa + lcommon, la - lcommon, &al); >> - bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl); >> + an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, >> &al); >> + bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, >> &bl); >> if (!al || !bl) >> goto nomem; >> >> diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out >> --- a/tests/test-bdiff.py.out >> +++ b/tests/test-bdiff.py.out >> @@ -28,9 +28,9 @@ showdiff( >>'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'): >> 'x\n\nx\n\n' >> 6 6 '' -> 'y\n\n' >> - 'x\n\n' >> - 9 9 '' -> 'y\n\n' >> - 'x\n\nz\n' >> + 'x\n' >> + 8 8 '' -> '\ny\n' >> + '\nx\n\nz\n' >> showdiff( >>'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n', >>'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'): >> diff --
Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations
On Thu, 17 Nov 2016 18:16:40 +0100, Mads Kiilerich wrote: > # HG changeset patch > # User Mads Kiilerich > # Date 1479322051 -3600 > # Wed Nov 16 19:47:31 2016 +0100 > # Node ID 851a14d5b80639efe61bd01ad215479c99106b40 > # Parent 7f39bccc1c96bffc83f3c6e774da57ecd38982a7 > bdiff: early pruning of common suffix before doing expensive computations > > Similar to the previous change, but trimming trailing lines. > > On mozilla-unified: > perfbdiff -m 3041e4d59df2 > ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to > ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to > perfbdiff 0e9928989e9c --alldata --count 10 > ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18) > ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22) > > diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c > --- a/mercurial/bdiff_module.c > +++ b/mercurial/bdiff_module.c > @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P > struct bdiff_line *al, *bl; > struct bdiff_hunk l, *h; > int an, bn, count; > - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; > + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax; > PyThreadState *_save; > > l.next = NULL; > @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P > if (*ia == '\n') > lcommon = li + 1; > /* we can almost add: if (li == lmax) lcommon = li; */ > + lmax -= lcommon; > + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1; > + li <= lmax && *ia == *ib; > + ++li, --ia, --ib) > + if (*ia == '\n') > + lcommonend = li; Perhaps this could read one byte before the sa and sb. For bdiff('', ''), li == lmax == 0 but ia == sa[-1]. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel