[PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-11-17 Thread Mads Kiilerich
# HG changeset patch
# User Mads Kiilerich 
# Date 1479322051 -3600
#  Wed Nov 16 19:47:31 2016 +0100
# Node ID 851a14d5b80639efe61bd01ad215479c99106b40
# Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
bdiff: early pruning of common suffix before doing expensive computations

Similar to the previous change, but trimming trailing lines.

On mozilla-unified:
perfbdiff -m 3041e4d59df2
! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to
! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to
perfbdiff 0e9928989e9c --alldata --count 10
! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18)
! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22)

diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c
--- a/mercurial/bdiff_module.c
+++ b/mercurial/bdiff_module.c
@@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
struct bdiff_line *al, *bl;
struct bdiff_hunk l, *h;
int an, bn, count;
-   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
+   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax;
PyThreadState *_save;
 
l.next = NULL;
@@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
if (*ia == '\n')
lcommon = li + 1;
/* we can almost add: if (li == lmax) lcommon = li; */
+   lmax -= lcommon;
+   for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
+li <= lmax && *ia == *ib;
+++li, --ia, --ib)
+   if (*ia == '\n')
+   lcommonend = li;
+   /* printf("common %i %i\n", lcommon, lcommonend); */
 
-   an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
-   bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
+   an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, &al);
+   bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, &bl);
if (!al || !bl)
goto nomem;
 
diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out
--- a/tests/test-bdiff.py.out
+++ b/tests/test-bdiff.py.out
@@ -28,9 +28,9 @@ showdiff(
   'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'):
  'x\n\nx\n\n'
  6 6 '' -> 'y\n\n'
- 'x\n\n'
- 9 9 '' -> 'y\n\n'
- 'x\n\nz\n'
+ 'x\n'
+ 8 8 '' -> '\ny\n'
+ '\nx\n\nz\n'
 showdiff(
   'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n',
   'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'):
diff --git a/tests/test-check-code.t b/tests/test-check-code.t
--- a/tests/test-check-code.t
+++ b/tests/test-check-code.t
@@ -15,10 +15,6 @@ New errors are not allowed. Warnings are
   Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code (glob)
   Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code (glob)
   Skipping i18n/polib.py it has no-che?k-code (glob)
-  mercurial/bdiff_module.c:89:
-   >   //printf("common prefix: %i next line: %i\n", li, llf);
-   don't use //-style comments
   Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob)
   Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob)
   Skipping mercurial/statprof.py it has no-che?k-code (glob)
-  [1]
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-12-15 Thread Pierre-Yves David



On 11/17/2016 06:16 PM, Mads Kiilerich wrote:

# HG changeset patch
# User Mads Kiilerich 
# Date 1479322051 -3600
#  Wed Nov 16 19:47:31 2016 +0100
# Node ID 851a14d5b80639efe61bd01ad215479c99106b40
# Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
bdiff: early pruning of common suffix before doing expensive computations

Similar to the previous change, but trimming trailing lines.


The change itself seems fine and the perf win is very tasty. However, 
this changesets breaks tests with --pure.


I'm tempted to drop it unless we can provide a test update to fold into 
this.


Test failure with --pure:

--- /home/marmoute/src/mercurial-dev/tests/test-bdiff.py.out
+++ /home/marmoute/src/mercurial-dev/tests/test-bdiff.py.err
@@ -28,15 +28,16 @@
   'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'):
  'x\n\nx\n\n'
  6 6 '' -> 'y\n\n'
- 'x\n'
- 8 8 '' -> '\ny\n'
- '\nx\n\nz\n'
+ 'x\n\n'
+ 9 9 '' -> 'y\n\n'
+ 'x\n\nz\n'
 showdiff(
   'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n',
   'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'):
- 'a\nb\nb\n'
- 6 6 '' -> 'a\nb\nb\nb\nc\n.\n'
- 'b\nc\n.\nd\ne\n'
+ 0 0 '' -> 'a\nb\nb\n'
+ 'a\nb\nb\nb\nc\n.\n'
+ 12 12 '' -> 'b\nc\n.\n'
+ 'd\ne\n'
  16 18 '.\n' -> ''
  'f\n'
 done


On mozilla-unified:
perfbdiff -m 3041e4d59df2
! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to
! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to
perfbdiff 0e9928989e9c --alldata --count 10
! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18)
! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22)

diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c
--- a/mercurial/bdiff_module.c
+++ b/mercurial/bdiff_module.c
@@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
struct bdiff_line *al, *bl;
struct bdiff_hunk l, *h;
int an, bn, count;
-   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
+   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax;
PyThreadState *_save;

l.next = NULL;
@@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
if (*ia == '\n')
lcommon = li + 1;
/* we can almost add: if (li == lmax) lcommon = li; */
+   lmax -= lcommon;
+   for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
+li <= lmax && *ia == *ib;
+++li, --ia, --ib)
+   if (*ia == '\n')
+   lcommonend = li;
+   /* printf("common %i %i\n", lcommon, lcommonend); */

-   an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
-   bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
+   an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend, &al);
+   bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend, &bl);
if (!al || !bl)
goto nomem;

diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out
--- a/tests/test-bdiff.py.out
+++ b/tests/test-bdiff.py.out
@@ -28,9 +28,9 @@ showdiff(
   'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'):
  'x\n\nx\n\n'
  6 6 '' -> 'y\n\n'
- 'x\n\n'
- 9 9 '' -> 'y\n\n'
- 'x\n\nz\n'
+ 'x\n'
+ 8 8 '' -> '\ny\n'
+ '\nx\n\nz\n'
 showdiff(
   'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n',
   'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'):
diff --git a/tests/test-check-code.t b/tests/test-check-code.t
--- a/tests/test-check-code.t
+++ b/tests/test-check-code.t
@@ -15,10 +15,6 @@ New errors are not allowed. Warnings are
   Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code (glob)
   Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code (glob)
   Skipping i18n/polib.py it has no-che?k-code (glob)
-  mercurial/bdiff_module.c:89:
-   >//printf("common prefix: %i next line: %i\n", li, llf);
-   don't use //-style comments
   Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob)
   Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob)
   Skipping mercurial/statprof.py it has no-che?k-code (glob)
-  [1]
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel



--
Pierre-Yves David
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-12-15 Thread Yuya Nishihara
On Thu, 15 Dec 2016 09:01:11 +0100, Pierre-Yves David wrote:
> On 11/17/2016 06:16 PM, Mads Kiilerich wrote:
> > # HG changeset patch
> > # User Mads Kiilerich 
> > # Date 1479322051 -3600
> > #  Wed Nov 16 19:47:31 2016 +0100
> > # Node ID 851a14d5b80639efe61bd01ad215479c99106b40
> > # Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
> > bdiff: early pruning of common suffix before doing expensive computations
> >
> > Similar to the previous change, but trimming trailing lines.
> 
> The change itself seems fine and the perf win is very tasty. However, 
> this changesets breaks tests with --pure.
> 
> I'm tempted to drop it unless we can provide a test update to fold into 
> this.

[...]

> > --- a/mercurial/bdiff_module.c
> > +++ b/mercurial/bdiff_module.c
> > @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
> > struct bdiff_line *al, *bl;
> > struct bdiff_hunk l, *h;
> > int an, bn, count;
> > -   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
> > +   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax;
> > PyThreadState *_save;
> >
> > l.next = NULL;
> > @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
> > if (*ia == '\n')
> > lcommon = li + 1;
> > /* we can almost add: if (li == lmax) lcommon = li; */
> > +   lmax -= lcommon;
> > +   for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
> > +li <= lmax && *ia == *ib;
> > +++li, --ia, --ib)
> > +   if (*ia == '\n')
> > +   lcommonend = li;

And there appears to be an off-by-one error. 'li' is a zero-based index and
'lmax' is a length.

+1 for dropping this for now.
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-12-15 Thread Augie Fackler

> On Dec 15, 2016, at 8:02 AM, Yuya Nishihara  wrote:
> 
> On Thu, 15 Dec 2016 09:01:11 +0100, Pierre-Yves David wrote:
>> On 11/17/2016 06:16 PM, Mads Kiilerich wrote:
>>> # HG changeset patch
>>> # User Mads Kiilerich 
>>> # Date 1479322051 -3600
>>> #  Wed Nov 16 19:47:31 2016 +0100
>>> # Node ID 851a14d5b80639efe61bd01ad215479c99106b40
>>> # Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
>>> bdiff: early pruning of common suffix before doing expensive computations
>>> 
>>> Similar to the previous change, but trimming trailing lines.
>> 
>> The change itself seems fine and the perf win is very tasty. However, 
>> this changesets breaks tests with --pure.
>> 
>> I'm tempted to drop it unless we can provide a test update to fold into 
>> this.
> 
> […]

I’d be fine to just #if pure the variations between pure and C, given the 
immense win this provides.

> 
>>> --- a/mercurial/bdiff_module.c
>>> +++ b/mercurial/bdiff_module.c
>>> @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
>>> struct bdiff_line *al, *bl;
>>> struct bdiff_hunk l, *h;
>>> int an, bn, count;
>>> -   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
>>> +   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax;
>>> PyThreadState *_save;
>>> 
>>> l.next = NULL;
>>> @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
>>> if (*ia == '\n')
>>> lcommon = li + 1;
>>> /* we can almost add: if (li == lmax) lcommon = li; */
>>> +   lmax -= lcommon;
>>> +   for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
>>> +li <= lmax && *ia == *ib;
>>> +++li, --ia, --ib)
>>> +   if (*ia == '\n')
>>> +   lcommonend = li;
> 
> And there appears to be an off-by-one error. 'li' is a zero-based index and
> 'lmax' is a length.
> 
> +1 for dropping this for now.

Given the off-by-one, yeah, drop it. I’m a little surprised the bdiff automated 
stress tester didn’t trip over that.

Sigh.
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-12-15 Thread Pierre-Yves David



On 12/15/2016 02:09 PM, Augie Fackler wrote:



On Dec 15, 2016, at 8:02 AM, Yuya Nishihara  wrote:

On Thu, 15 Dec 2016 09:01:11 +0100, Pierre-Yves David wrote:

On 11/17/2016 06:16 PM, Mads Kiilerich wrote:

# HG changeset patch
# User Mads Kiilerich 
# Date 1479322051 -3600
#  Wed Nov 16 19:47:31 2016 +0100
# Node ID 851a14d5b80639efe61bd01ad215479c99106b40
# Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
bdiff: early pruning of common suffix before doing expensive computations

Similar to the previous change, but trimming trailing lines.


The change itself seems fine and the perf win is very tasty. However,
this changesets breaks tests with --pure.

I'm tempted to drop it unless we can provide a test update to fold into
this.


[…]


I’d be fine to just #if pure the variations between pure and C, given the 
immense win this provides.


The previous changesets (already public) is also breaking pure. Can one 
of you submit the necessary test update?



--- a/mercurial/bdiff_module.c
+++ b/mercurial/bdiff_module.c
@@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
struct bdiff_line *al, *bl;
struct bdiff_hunk l, *h;
int an, bn, count;
-   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
+   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax;
PyThreadState *_save;

l.next = NULL;
@@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
if (*ia == '\n')
lcommon = li + 1;
/* we can almost add: if (li == lmax) lcommon = li; */
+   lmax -= lcommon;
+   for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
+li <= lmax && *ia == *ib;
+++li, --ia, --ib)
+   if (*ia == '\n')
+   lcommonend = li;


And there appears to be an off-by-one error. 'li' is a zero-based index and
'lmax' is a length.

+1 for dropping this for now.


Given the off-by-one, yeah, drop it. I’m a little surprised the bdiff automated 
stress tester didn’t trip over that.



--
Pierre-Yves David
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-12-15 Thread Augie Fackler
On Thu, Dec 15, 2016 at 8:55 AM, Pierre-Yves David
 wrote:
> The previous changesets (already public) is also breaking pure. Can one of
> you submit the necessary test update?

Yeah, I'll draft it and push it within the next hour.
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-12-15 Thread Pierre-Yves David



On 12/15/2016 02:59 PM, Augie Fackler wrote:

On Thu, Dec 15, 2016 at 8:55 AM, Pierre-Yves David
 wrote:

The previous changesets (already public) is also breaking pure. Can one of
you submit the necessary test update?


Yeah, I'll draft it and push it within the next hour.


Thanks!

--
Pierre-Yves David
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-11-17 Thread Gregory Szorc
On Thu, Nov 17, 2016 at 9:16 AM, Mads Kiilerich  wrote:

> # HG changeset patch
> # User Mads Kiilerich 
> # Date 1479322051 -3600
> #  Wed Nov 16 19:47:31 2016 +0100
> # Node ID 851a14d5b80639efe61bd01ad215479c99106b40
> # Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
> bdiff: early pruning of common suffix before doing expensive computations
>
> Similar to the previous change, but trimming trailing lines.
>
> On mozilla-unified:
> perfbdiff -m 3041e4d59df2
> ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to
> ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to
> perfbdiff 0e9928989e9c --alldata --count 10
> ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18)
> ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22)
>
>
I can collaborate the perf improvements. On mozilla-unified (using a
different clone and machine from what I wrote in my commit messages
reporting bdiff perf - but the CPU model is the same), these 2 patches
result in:

$ perfbdiff -m 3041e4d59df2
! wall 0.040755 comb 0.04 user 0.04 sys 0.00 (best of 100)
! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304)

$  perfbdiff 0e9928989e9c --alldata --count 100
! wall 4.59 comb 4.58 user 4.58 sys 0.00 (best of 3)
! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6)

Compared to 4.0:
$ perfbdiff -m 3041e4d59df2
! wall 0.076924 comb 0.08 user 0.08 sys 0.00 (best of 100)
! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304)

$  perfbdiff 0e9928989e9c --alldata --count 100
! wall 8.892216 comb 8.88 user 8.88 sys 0.00 (best of 3)
! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6)

Looking at the assembly and profiling output, I believe I've confirmed my
suspicions from my comment on part 1: there is still room to optimize this
code. Again, that can be deferred to a follow-up.


> diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c
> --- a/mercurial/bdiff_module.c
> +++ b/mercurial/bdiff_module.c
> @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
> struct bdiff_line *al, *bl;
> struct bdiff_hunk l, *h;
> int an, bn, count;
> -   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
> +   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0,
> lmax;
> PyThreadState *_save;
>
> l.next = NULL;
> @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
> if (*ia == '\n')
> lcommon = li + 1;
> /* we can almost add: if (li == lmax) lcommon = li; */
> +   lmax -= lcommon;
> +   for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
> +li <= lmax && *ia == *ib;
> +++li, --ia, --ib)
> +   if (*ia == '\n')
> +   lcommonend = li;
> +   /* printf("common %i %i\n", lcommon, lcommonend); */
>
> -   an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
> -   bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
> +   an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend,
> &al);
> +   bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend,
> &bl);
> if (!al || !bl)
> goto nomem;
>
> diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out
> --- a/tests/test-bdiff.py.out
> +++ b/tests/test-bdiff.py.out
> @@ -28,9 +28,9 @@ showdiff(
>'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'):
>   'x\n\nx\n\n'
>   6 6 '' -> 'y\n\n'
> - 'x\n\n'
> - 9 9 '' -> 'y\n\n'
> - 'x\n\nz\n'
> + 'x\n'
> + 8 8 '' -> '\ny\n'
> + '\nx\n\nz\n'
>  showdiff(
>'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n',
>'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'):
> diff --git a/tests/test-check-code.t b/tests/test-check-code.t
> --- a/tests/test-check-code.t
> +++ b/tests/test-check-code.t
> @@ -15,10 +15,6 @@ New errors are not allowed. Warnings are
>Skipping hgext/fsmonitor/pywatchman/msc_stdint.h it has no-che?k-code
> (glob)
>Skipping hgext/fsmonitor/pywatchman/pybser.py it has no-che?k-code
> (glob)
>Skipping i18n/polib.py it has no-che?k-code (glob)
> -  mercurial/bdiff_module.c:89:
> -   >   //printf("common prefix: %i next line: %i\n", li, llf);
> -   don't use //-style comments
>Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob)
>Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob)
>Skipping mercurial/statprof.py it has no-che?k-code (glob)
> -  [1]
> ___
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-11-17 Thread Gregory Szorc
On Thu, Nov 17, 2016 at 11:26 AM, Gregory Szorc 
wrote:

> On Thu, Nov 17, 2016 at 9:16 AM, Mads Kiilerich 
> wrote:
>
>> # HG changeset patch
>> # User Mads Kiilerich 
>> # Date 1479322051 -3600
>> #  Wed Nov 16 19:47:31 2016 +0100
>> # Node ID 851a14d5b80639efe61bd01ad215479c99106b40
>> # Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
>> bdiff: early pruning of common suffix before doing expensive computations
>>
>> Similar to the previous change, but trimming trailing lines.
>>
>> On mozilla-unified:
>> perfbdiff -m 3041e4d59df2
>> ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to
>> ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to
>> perfbdiff 0e9928989e9c --alldata --count 10
>> ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18)
>> ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22)
>>
>>
> I can collaborate the perf improvements. On mozilla-unified (using a
> different clone and machine from what I wrote in my commit messages
> reporting bdiff perf - but the CPU model is the same), these 2 patches
> result in:
>
> $ perfbdiff -m 3041e4d59df2
> ! wall 0.040755 comb 0.04 user 0.04 sys 0.00 (best of 100)
> ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304)
>
> $  perfbdiff 0e9928989e9c --alldata --count 100
> ! wall 4.59 comb 4.58 user 4.58 sys 0.00 (best of 3)
> ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6)
>
> Compared to 4.0:
> $ perfbdiff -m 3041e4d59df2
> ! wall 0.076924 comb 0.08 user 0.08 sys 0.00 (best of 100)
> ! wall 0.007953 comb 0.01 user 0.01 sys 0.00 (best of 304)
>
> $  perfbdiff 0e9928989e9c --alldata --count 100
> ! wall 8.892216 comb 8.88 user 8.88 sys 0.00 (best of 3)
> ! wall 1.748509 comb 1.75 user 1.75 sys 0.00 (best of 6)
>
> Looking at the assembly and profiling output, I believe I've confirmed my
> suspicions from my comment on part 1: there is still room to optimize this
> code. Again, that can be deferred to a follow-up.
>

It's worth looking at where we're spending CPU time after this series.
Measuring `perfbdiff 0e9928989e9c --alldata --count 100`:

This series
40% bdiff_module.c:bdiff()
38% bidff.c:bdiff_splitlines()
12% bdiff.c:bdiff_diff()
 5% __memcmp_sse4_1 (not sure where exactly this is called from)
 4% bdiff.c:recurse()

Before prefix and suffix matching:
62% bdiff.c:bdiff_splitlines()
23% bdiff.c.bdiff_diff()
 8% memcmp_sse4_1
 5% bdiff.c:recurse()

4.0
82% bdiff.c:bdiff_splitlines()
10% bdiff.c:bdiff_diff()
 4.5% __memcmp_sse4_1
 2.5% bdiff.c:recurse()

This series shifted time out of bdiff_splitlines() *and* bdiff_diff().
Furthermore, from 4.0 to this series, the ratio of time spent before
bdiff_diff() and inside is roughly the same, despite things getting 5-10x
faster in that time! It is alarming we're still spending so much time in
what is effectively set-up to run an algorithm. But this is also a good
thing: we know the setup code can still be optimized which means there are
still significant perf wins on the table.


>
>> diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c
>> --- a/mercurial/bdiff_module.c
>> +++ b/mercurial/bdiff_module.c
>> @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
>> struct bdiff_line *al, *bl;
>> struct bdiff_hunk l, *h;
>> int an, bn, count;
>> -   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
>> +   Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0,
>> lmax;
>> PyThreadState *_save;
>>
>> l.next = NULL;
>> @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
>> if (*ia == '\n')
>> lcommon = li + 1;
>> /* we can almost add: if (li == lmax) lcommon = li; */
>> +   lmax -= lcommon;
>> +   for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
>> +li <= lmax && *ia == *ib;
>> +++li, --ia, --ib)
>> +   if (*ia == '\n')
>> +   lcommonend = li;
>> +   /* printf("common %i %i\n", lcommon, lcommonend); */
>>
>> -   an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
>> -   bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
>> +   an = bdiff_splitlines(sa + lcommon, la - lcommon - lcommonend,
>> &al);
>> +   bn = bdiff_splitlines(sb + lcommon, lb - lcommon - lcommonend,
>> &bl);
>> if (!al || !bl)
>> goto nomem;
>>
>> diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out
>> --- a/tests/test-bdiff.py.out
>> +++ b/tests/test-bdiff.py.out
>> @@ -28,9 +28,9 @@ showdiff(
>>'x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n'):
>>   'x\n\nx\n\n'
>>   6 6 '' -> 'y\n\n'
>> - 'x\n\n'
>> - 9 9 '' -> 'y\n\n'
>> - 'x\n\nz\n'
>> + 'x\n'
>> + 8 8 '' -> '\ny\n'
>> + '\nx\n\nz\n'
>>  showdiff(
>>'a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n',
>>'a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n'):
>> diff --

Re: [PATCH 2 of 2] bdiff: early pruning of common suffix before doing expensive computations

2016-12-06 Thread Yuya Nishihara
On Thu, 17 Nov 2016 18:16:40 +0100, Mads Kiilerich wrote:
> # HG changeset patch
> # User Mads Kiilerich 
> # Date 1479322051 -3600
> #  Wed Nov 16 19:47:31 2016 +0100
> # Node ID 851a14d5b80639efe61bd01ad215479c99106b40
> # Parent  7f39bccc1c96bffc83f3c6e774da57ecd38982a7
> bdiff: early pruning of common suffix before doing expensive computations
> 
> Similar to the previous change, but trimming trailing lines.
> 
> On mozilla-unified:
> perfbdiff -m 3041e4d59df2
> ! wall 0.024618 comb 0.02 user 0.02 sys 0.00 (best of 116) to
> ! wall 0.008259 comb 0.01 user 0.01 sys 0.00 (best of 356) to
> perfbdiff 0e9928989e9c --alldata --count 10
> ! wall 0.579235 comb 0.58 user 0.58 sys 0.00 (best of 18)
> ! wall 0.469869 comb 0.46 user 0.46 sys 0.00 (best of 22)
> 
> diff --git a/mercurial/bdiff_module.c b/mercurial/bdiff_module.c
> --- a/mercurial/bdiff_module.c
> +++ b/mercurial/bdiff_module.c
> @@ -66,7 +66,7 @@ static PyObject *bdiff(PyObject *self, P
>   struct bdiff_line *al, *bl;
>   struct bdiff_hunk l, *h;
>   int an, bn, count;
> - Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
> + Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lcommonend = 0, lmax;
>   PyThreadState *_save;
>  
>   l.next = NULL;
> @@ -88,9 +88,16 @@ static PyObject *bdiff(PyObject *self, P
>   if (*ia == '\n')
>   lcommon = li + 1;
>   /* we can almost add: if (li == lmax) lcommon = li; */
> + lmax -= lcommon;
> + for (li = 0, ia = sa + la - 1, ib = sb + lb - 1;
> +  li <= lmax && *ia == *ib;
> +  ++li, --ia, --ib)
> + if (*ia == '\n')
> + lcommonend = li;

Perhaps this could read one byte before the sa and sb. For bdiff('', ''),
li == lmax == 0 but ia == sa[-1].
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel