[PATCH 01 of 14] sparse-revlog: skip the span check in the sparse-revlog case

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1539611108 -7200
#  Mon Oct 15 15:45:08 2018 +0200
# Node ID 1da2545f2fb4468b394bafe644223080fa8ccd1f
# Parent  2ad56a9b983b3c3e70cdd4239af27a9d9c31d33e
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
1da2545f2fb4
sparse-revlog: skip the span check in the sparse-revlog case

This significantly improves the performance on unbundling on smaller
repositories.

Mercurial: unbundling 1K revisions

no-sparse-revlog: 500 ms
sparse-revlog-before: 689 ms
sparse-revlog-after:  484 ms

Pypy: unbundling 1K revisions

no-sparse-revlog: 1.242 s
sparse-revlog-before: 1.135 s
sparse-revlog-after:  0.860 s

NetBeans: unbundling 1K revisions

no-sparse-revlog: 1.386 s
sparse-revlog-before: 2.368 s
sparse-revlog-after:  1.191 s

Mozilla: unbundling 1K revisions

no-sparse-revlog: 3.103 s
sparse-revlog-before: 3.367 s
sparse-revlog-after:  3.093 s

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -489,45 +489,23 @@ def isgooddeltainfo(revlog, deltainfo, r
 #   deltas we need to apply -- bounding it limits the amount of CPU
 #   we consume.
 
-if revlog._sparserevlog:
-# As sparse-read will be used, we can consider that the distance,
-# instead of being the span of the whole chunk,
-# is the span of the largest read chunk
-base = deltainfo.base
-
-if base != nullrev:
-deltachain = revlog._deltachain(base)[0]
-else:
-deltachain = []
-
-# search for the first non-snapshot revision
-for idx, r in enumerate(deltachain):
-if not revlog.issnapshot(r):
-break
-deltachain = deltachain[idx:]
-chunks = slicechunk(revlog, deltachain, deltainfo)
-all_span = [segmentspan(revlog, revs, deltainfo)
-for revs in chunks]
-distance = max(all_span)
-else:
-distance = deltainfo.distance
-
 textlen = revinfo.textlen
 defaultmax = textlen * 4
 maxdist = revlog._maxdeltachainspan
 if not maxdist:
-maxdist = distance # ensure the conditional pass
+maxdist = deltainfo.distance # ensure the conditional pass
 maxdist = max(maxdist, defaultmax)
-if revlog._sparserevlog and maxdist < revlog._srmingapsize:
-# In multiple place, we are ignoring irrelevant data range below a
-# certain size. Be also apply this tradeoff here and relax span
-# constraint for small enought content.
-maxdist = revlog._srmingapsize
 
 # Bad delta from read span:
 #
 #   If the span of data read is larger than the maximum allowed.
-if maxdist < distance:
+#
+#   In the sparse-revlog case, we rely on the associated "sparse reading"
+#   to avoid issue related to the span of data. In theory, it would be
+#   possible to build pathological revlog where delta pattern would lead
+#   to too many reads. However, they do not happen in practice at all. So
+#   we skip the span check entirely.
+if not revlog._sparserevlog and maxdist < deltainfo.distance:
 return False
 
 # Bad delta from new delta size:
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 02 of 14] sparse-revlog: drop unused deltainfo parameter from _slicechunktodensity

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1539693831 -7200
#  Tue Oct 16 14:43:51 2018 +0200
# Node ID 1db931d5c9bbec37af644f4e2fafaf9cc292c9d7
# Parent  1da2545f2fb4468b394bafe644223080fa8ccd1f
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
1db931d5c9bb
sparse-revlog: drop unused deltainfo parameter from _slicechunktodensity

We no longer need to deal with slicing logic including uncommitted revision. We
drop the associated code.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -59,7 +59,7 @@ class _testrevlog(object):
 def __len__(self):
 return len(self._data)
 
-def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
+def slicechunk(revlog, revs, targetsize=None):
 """slice revs to reduce the amount of unrelated data to be read from disk.
 
 ``revs`` is sliced into groups that should be read in one time.
@@ -116,12 +116,7 @@ def slicechunk(revlog, revs, deltainfo=N
 targetsize = max(targetsize, revlog._srmingapsize)
 # targetsize should not be specified when evaluating delta candidates:
 # * targetsize is used to ensure we stay within specification when reading,
-# * deltainfo is used to pick are good delta chain when writing.
-if not (deltainfo is None or targetsize is None):
-msg = 'cannot use `targetsize` with a `deltainfo`'
-raise error.ProgrammingError(msg)
 for chunk in _slicechunktodensity(revlog, revs,
-  deltainfo,
   revlog._srdensitythreshold,
   revlog._srmingapsize):
 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
@@ -204,16 +199,13 @@ def _slicechunktosize(revlog, revs, targ
 endrevidx = idx
 yield _trimchunk(revlog, revs, startrevidx)
 
-def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
+def _slicechunktodensity(revlog, revs, targetdensity=0.5,
  mingapsize=0):
 """slice revs to reduce the amount of unrelated data to be read from disk.
 
 ``revs`` is sliced into groups that should be read in one time.
 Assume that revs are sorted.
 
-``deltainfo`` is a _deltainfo instance of a revision that we would append
-to the top of the revlog.
-
 The initial chunk is sliced until the overall density (payload/chunks-span
 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
 skipped.
@@ -264,15 +256,8 @@ def _slicechunktodensity(revlog, revs, d
 yield revs
 return
 
-nextrev = len(revlog)
-nextoffset = revlog.end(nextrev - 1)
-
-if deltainfo is None:
-deltachainspan = segmentspan(revlog, revs)
-chainpayload = sum(length(r) for r in revs)
-else:
-deltachainspan = deltainfo.distance
-chainpayload = deltainfo.compresseddeltalen
+deltachainspan = segmentspan(revlog, revs)
+chainpayload = sum(length(r) for r in revs)
 
 if deltachainspan < mingapsize:
 yield revs
@@ -289,21 +274,13 @@ def _slicechunktodensity(revlog, revs, d
 yield revs
 return
 
-if deltainfo is not None and deltainfo.deltalen:
-revs = list(revs)
-revs.append(nextrev)
-
 # Store the gaps in a heap to have them sorted by decreasing size
 gapsheap = []
 heapq.heapify(gapsheap)
 prevend = None
 for i, rev in enumerate(revs):
-if rev < nextrev:
-revstart = start(rev)
-revlen = length(rev)
-else:
-revstart = nextoffset
-revlen = deltainfo.deltalen
+revstart = start(rev)
+revlen = length(rev)
 
 # Skip empty revisions to form larger holes
 if revlen == 0:
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 04 of 14] sparse-revlog: fast-path before computing payload size

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541687398 -3600
#  Thu Nov 08 15:29:58 2018 +0100
# Node ID 3bdd984df153304a956d31d2f3fbc3cd7f0e41c2
# Parent  c8eba56f115019badd26223e504ca4899210075a
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
3bdd984df153
sparse-revlog: fast-path before computing payload size

In this fast-path case, we do not need to compute the full delta chain
payload. Since it comes with a significant cost, we avoid doing so if
possible.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -257,13 +257,13 @@ def _slicechunktodensity(revlog, revs, t
 return
 
 deltachainspan = segmentspan(revlog, revs)
-chainpayload = sum(length(r) for r in revs)
 
 if deltachainspan < mingapsize:
 yield revs
 return
 
 readdata = deltachainspan
+chainpayload = sum(length(r) for r in revs)
 
 if deltachainspan:
 density = chainpayload / float(deltachainspan)
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 03 of 14] sparse-revlog: drop unused deltainfo parameter from segmentspan

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1539693988 -7200
#  Tue Oct 16 14:46:28 2018 +0200
# Node ID c8eba56f115019badd26223e504ca4899210075a
# Parent  1db931d5c9bbec37af644f4e2fafaf9cc292c9d7
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
c8eba56f1150
sparse-revlog: drop unused deltainfo parameter from segmentspan

We no longer need to deal with slicing logic including uncommitted revision.
We drop the associated code.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -378,7 +378,7 @@ def _trimchunk(revlog, revs, startidx, e
 
 return revs[startidx:endidx]
 
-def segmentspan(revlog, revs, deltainfo=None):
+def segmentspan(revlog, revs):
 """Get the byte span of a segment of revisions
 
 revs is a sorted array of revision numbers
@@ -404,13 +404,7 @@ def segmentspan(revlog, revs, deltainfo=
 """
 if not revs:
 return 0
-if deltainfo is not None and len(revlog) <= revs[-1]:
-if len(revs) == 1:
-return deltainfo.deltalen
-offset = revlog.end(len(revlog) - 1)
-end = deltainfo.deltalen + offset
-else:
-end = revlog.end(revs[-1])
+end = revlog.end(revs[-1])
 return end - revlog.start(revs[0])
 
 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 05 of 14] sparse-revlog: stop using a heap to track gaps

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541689290 -3600
#  Thu Nov 08 16:01:30 2018 +0100
# Node ID 8ebe5520cc4ae87f6fccba20897d292489a651db
# Parent  3bdd984df153304a956d31d2f3fbc3cd7f0e41c2
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
8ebe5520cc4a
sparse-revlog: stop using a heap to track gaps

The heap doesn't bring any performance advantage as we can simply sort the
final list.

Moreover, the lesser complexity helps a lot when we later implement it in C.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -275,8 +275,7 @@ def _slicechunktodensity(revlog, revs, t
 return
 
 # Store the gaps in a heap to have them sorted by decreasing size
-gapsheap = []
-heapq.heapify(gapsheap)
+gaps = []
 prevend = None
 for i, rev in enumerate(revs):
 revstart = start(rev)
@@ -290,21 +289,23 @@ def _slicechunktodensity(revlog, revs, t
 gapsize = revstart - prevend
 # only consider holes that are large enough
 if gapsize > mingapsize:
-heapq.heappush(gapsheap, (-gapsize, i))
+gaps.append((gapsize, i))
 
 prevend = revstart + revlen
+# sort the gaps to pop them from largest to small
+gaps.sort()
 
 # Collect the indices of the largest holes until the density is acceptable
 indicesheap = []
 heapq.heapify(indicesheap)
-while gapsheap and density < targetdensity:
-oppgapsize, gapidx = heapq.heappop(gapsheap)
+while gaps and density < targetdensity:
+gapsize, gapidx = gaps.pop()
 
 heapq.heappush(indicesheap, gapidx)
 
 # the gap sizes are stored as negatives to be sorted decreasingly
 # by the heap
-readdata -= (-oppgapsize)
+readdata -= gapsize
 if readdata > 0:
 density = chainpayload / float(readdata)
 else:
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 06 of 14] sparse-revlog: stop using a heap to track selected gap

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541689636 -3600
#  Thu Nov 08 16:07:16 2018 +0100
# Node ID ddafb271512fc26de60da5dceffc1509bb023d66
# Parent  8ebe5520cc4ae87f6fccba20897d292489a651db
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
ddafb271512f
sparse-revlog: stop using a heap to track selected gap

Same logic as for 'gapsheap', we don't actually need a heap.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -10,7 +10,6 @@
 from __future__ import absolute_import
 
 import collections
-import heapq
 import struct
 
 # import stuff from node for others to import from revlog
@@ -296,12 +295,11 @@ def _slicechunktodensity(revlog, revs, t
 gaps.sort()
 
 # Collect the indices of the largest holes until the density is acceptable
-indicesheap = []
-heapq.heapify(indicesheap)
+selected = []
 while gaps and density < targetdensity:
 gapsize, gapidx = gaps.pop()
 
-heapq.heappush(indicesheap, gapidx)
+selected.append(gapidx)
 
 # the gap sizes are stored as negatives to be sorted decreasingly
 # by the heap
@@ -310,11 +308,11 @@ def _slicechunktodensity(revlog, revs, t
 density = chainpayload / float(readdata)
 else:
 density = 1.0
+selected.sort()
 
 # Cut the revs at collected indices
 previdx = 0
-while indicesheap:
-idx = heapq.heappop(indicesheap)
+for idx in selected:
 
 chunk = _trimchunk(revlog, revs, previdx, idx)
 if chunk:
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 07 of 14] sparse-revlog: rework the way we enforce chunk size limit

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541782717 -3600
#  Fri Nov 09 17:58:37 2018 +0100
# Node ID b77a6b74ef31e1b3706c1c6127a15eede0334f71
# Parent  ddafb271512fc26de60da5dceffc1509bb023d66
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
b77a6b74ef31
sparse-revlog: rework the way we enforce chunk size limit

We move from a O(N) algorithm to a O(log(N)) algorithm.

The previous algorithm was traversing the whole delta chain, looking for the
exact point where it became too big. This would result in most of the delta
chain to be traversed.

Instead, we now use a "binary" approach, slicing the chain in two until we
have a chunk of the appropriate size.

We still keep the previous algorithm for the snapshots part. There are few of
them and they are large bits of data distant from each other. So the previous
algorithm should work well in that case.

To take a practical example of restoring manifest revision '59547c40bc4c' for
a reference NetBeans repository (using sparse-revlog). The media time of the
step `slice-sparse-chain` of `perfrevlogrevision` improve from 1.109 ms to
0.660 ms.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -176,18 +176,22 @@ def _slicechunktosize(revlog, revs, targ
 [[3], [5]]
 """
 assert targetsize is None or 0 <= targetsize
-if targetsize is None or segmentspan(revlog, revs) <= targetsize:
+startdata = revlog.start(revs[0])
+enddata = revlog.end(revs[-1])
+fullspan = enddata - startdata
+if targetsize is None or fullspan <= targetsize:
 yield revs
 return
 
 startrevidx = 0
-startdata = revlog.start(revs[0])
 endrevidx = 0
 iterrevs = enumerate(revs)
 next(iterrevs) # skip first rev.
+# first step: get snapshots out of the way
 for idx, r in iterrevs:
 span = revlog.end(r) - startdata
-if span <= targetsize:
+snapshot = revlog.issnapshot(r)
+if span <= targetsize and snapshot:
 endrevidx = idx
 else:
 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
@@ -196,6 +200,29 @@ def _slicechunktosize(revlog, revs, targ
 startrevidx = idx
 startdata = revlog.start(r)
 endrevidx = idx
+if not snapshot:
+break
+
+# for the others, we use binary slicing to quickly converge toward valid
+# chunks (otherwise, we might end up looking for start/end of many
+# revisions)
+nbitem = len(revs)
+while (enddata - startdata) > targetsize:
+endrevidx = nbitem
+if nbitem - startrevidx <= 1:
+break # protect against individual chunk larger than limit
+localenddata = revlog.end(revs[endrevidx - 1])
+span = localenddata - startdata
+while (localenddata - startdata) > targetsize:
+if endrevidx - startrevidx <= 1:
+break # protect against individual chunk larger than limit
+endrevidx -= (endrevidx - startrevidx) // 2
+localenddata = revlog.end(revs[endrevidx -1])
+span = localenddata - startdata
+yield _trimchunk(revlog, revs, startrevidx, endrevidx)
+startrevidx = endrevidx
+startdata = revlog.start(revs[startrevidx])
+
 yield _trimchunk(revlog, revs, startrevidx)
 
 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 08 of 14] sparse-revlog: add a `index_get_start` function in C

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541785348 -3600
#  Fri Nov 09 18:42:28 2018 +0100
# Node ID 524f8117280d1a36301653c463f969fbc2391a7c
# Parent  b77a6b74ef31e1b3706c1c6127a15eede0334f71
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
524f8117280d
sparse-revlog: add a `index_get_start` function in C

We are about to implement a native version of `slicechunktodensity`. For
clarity, we introduce the helper functions first. This new function provides
an efficient way to retrieve some of the information needed by
`slicechunktodensity`.

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -179,6 +179,27 @@ static inline int index_get_parents(inde
return 0;
 }
 
+static inline uint64_t index_get_start(indexObject *self, Py_ssize_t rev)
+{
+   uint64_t offset;
+   if (rev >= self->length) {
+   PyObject *tuple =
+   PyList_GET_ITEM(self->added, rev - self->length);
+   offset = (uint64_t)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 0));
+   } else {
+   const char *data = index_deref(self, rev);
+   offset = getbe32(data + 4);
+   if (rev == 0) /* mask out version number for the first entry */
+   offset &= 0x;
+   else {
+   uint32_t offset_high = getbe32(data);
+   offset |= ((uint64_t)offset_high) << 32;
+   }
+   }
+   offset = offset >> 16;
+   return offset;
+}
+
 /*
  * RevlogNG format (all in big endian, data may be inlined):
  *6 bytes: offset
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 09 of 14] sparse-revlog: add a `index_get_length` function in C

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541785378 -3600
#  Fri Nov 09 18:42:58 2018 +0100
# Node ID ef4813b8b4911e5a73309b41f1f7496983792b64
# Parent  524f8117280d1a36301653c463f969fbc2391a7c
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
ef4813b8b491
sparse-revlog: add a `index_get_length` function in C

We are about to implement a native version of `slicechunktodensity`. For
clarity, we introduce the helper functions first. This new function provides
an efficient way to retrieve some of the information needed by
`slicechunktodensity`.

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -200,6 +200,18 @@ static inline uint64_t index_get_start(i
return offset;
 }
 
+static inline int index_get_length(indexObject *self, Py_ssize_t rev)
+{
+   if (rev >= self->length) {
+   PyObject *tuple =
+   PyList_GET_ITEM(self->added, rev - self->length);
+   return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 1));
+   } else {
+   const char *data = index_deref(self, rev);
+   return getbe32(data + 8);
+   }
+}
+
 /*
  * RevlogNG format (all in big endian, data may be inlined):
  *6 bytes: offset
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 10 of 14] sparse-revlog: add a `index_segment_span` function in C

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541785396 -3600
#  Fri Nov 09 18:43:16 2018 +0100
# Node ID 036a7425b869b5b8e3e7f528ed6d992c48ebb52e
# Parent  ef4813b8b4911e5a73309b41f1f7496983792b64
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
036a7425b869
sparse-revlog: add a `index_segment_span` function in C

We are about to implement a native version of `slicechunktodensity`. For
clarity, we introduce the helper functions first. This new function provides
an efficient way to retrieve some of the information needed by
`slicechunktodensity`.

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -1010,6 +1010,14 @@ bail:
return NULL;
 }
 
+static inline long index_segment_span(indexObject *self, Py_ssize_t start_rev,
+  Py_ssize_t end_rev)
+{
+   return (index_get_start(self, end_rev) -
+   index_get_start(self, start_rev) +
+   index_get_length(self, end_rev));
+}
+
 static inline int nt_level(const char *node, Py_ssize_t level)
 {
int v = node[level >> 1];
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 11 of 14] sparse-revlog: add a `_trimchunk` function in C

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541785523 -3600
#  Fri Nov 09 18:45:23 2018 +0100
# Node ID 0ea42453fa491793d1e145f5093b65e84cb65e97
# Parent  036a7425b869b5b8e3e7f528ed6d992c48ebb52e
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
0ea42453fa49
sparse-revlog: add a `_trimchunk` function in C

We are about to implement a native version of `slicechunktodensity`. For
clarity, we introduce the helper functions first.

This function is a native implementation of the python function `_trimchunk`
in `mercurial/revlogutils/deltas.py`.

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -1018,6 +1018,29 @@ static inline long index_segment_span(in
index_get_length(self, end_rev));
 }
 
+/* returns revs[startidx:endidx] without empty trailing revs */
+static PyObject *_trimchunk(indexObject *self, PyObject *revs, long startidx,
+long endidx)
+{
+   while (endidx > 1 && endidx > startidx) {
+   PyObject *rev = PyList_GET_ITEM(revs, endidx - 1);
+   if (rev == NULL) {
+   return NULL;
+   }
+   Py_ssize_t r = PyInt_AsLong(rev);
+   if (r == -1 && PyErr_Occurred()) {
+   return NULL;
+   }
+   if (index_get_length(self, r - 1) != 0) {
+   break;
+   }
+   endidx -= 1;
+   }
+   PyObject *chunk = PyList_GetSlice(revs, startidx, endidx);
+   Py_INCREF(chunk);
+   return chunk;
+}
+
 static inline int nt_level(const char *node, Py_ssize_t level)
 {
int v = node[level >> 1];
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 12 of 14] sparse-revlog: introduce native (C) implementation of slicechunktodensity

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541785632 -3600
#  Fri Nov 09 18:47:12 2018 +0100
# Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
# Parent  0ea42453fa491793d1e145f5093b65e84cb65e97
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
4a1104eade1d
sparse-revlog: introduce native (C) implementation of slicechunktodensity

This is a C implementation of `_slicechunktodensity` in the
`mercurial/revlogutils/deltas.py` file.

The algorithm involves a lot of integer manipulation and low-level access to
index data. Having a C implementation of it raises a large performance
improvement. See later changeset in this series for details.

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "bitmanipulation.h"
@@ -1041,6 +1042,197 @@ static PyObject *_trimchunk(indexObject 
return chunk;
 }
 
+struct Gap {
+   long size;
+   long idx;
+};
+
+static int gap_compare(const void *left, const void *right)
+{
+   return ((struct Gap *)left)->size - ((struct Gap *)right)->size;
+}
+static int long_compare(const void *left, const void *right)
+{
+   return *(long *)left - *(long *)right;
+}
+
+static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
+{
+   /* method arguments */
+   PyObject *list_revs = NULL; /* revisions in the chain */
+   double targetdensity = 0.5; /* min density to achieve */
+   long mingapsize = 0;/* threshold to ignore gaps */
+
+   /* other core variables */
+   long i;  /* used for various iteration */
+   PyObject *result = NULL; /* the final return of the function */
+
+   /* generic information about the delta chain being slice */
+   Py_ssize_t num_revs = 0; /* size of the full delta chain */
+   Py_ssize_t *revs = NULL; /* native array of revision in the chain */
+   long chainpayload = 0;   /* sum of all delta in the chain */
+   long deltachainspan = 0; /* distance from first byte to last byte */
+
+   /* variable used for slicing the delta chain */
+   long readdata = 0;  /* amount of data currently planned to be read */
+   double density = 0; /* ration of payload data compared to read ones */
+   struct Gap *gaps = NULL; /* array of notable gap in the chain */
+   long num_gaps = 0; /* total number of notable gap recorded so far */
+   Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
+   long num_selected = 0;   /* number of gaps skipped */
+   PyObject *chunk = NULL;  /* individual slice */
+   PyObject *allchunks = PyList_New(num_selected); /* all slices */
+
+   /* parsing argument */
+   if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs,
+ &targetdensity, &mingapsize)) {
+   goto bail;
+   }
+
+   /* If the delta chain contains a single element, we do not need slicing
+*/
+   num_revs = PyList_GET_SIZE(list_revs);
+   if (num_revs <= 1) {
+   result = PyTuple_Pack(1, list_revs);
+   goto done;
+   }
+
+   /* Turn the python list into a native integer array (for efficiency) */
+   revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t));
+   if (revs == NULL) {
+   PyErr_NoMemory();
+   goto bail;
+   }
+   for (i = 0; i < num_revs; i++) {
+   Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
+   if (revnum == -1 && PyErr_Occurred()) {
+   goto bail;
+   }
+   revs[i] = revnum;
+   }
+
+   /* Compute and check various property of the unsliced delta chain */
+   deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
+
+   if (deltachainspan <= mingapsize) {
+   result = PyTuple_Pack(1, list_revs);
+   goto done;
+   }
+   chainpayload = 0;
+   for (i = 0; i < num_revs; i++) {
+   chainpayload += index_get_length(self, revs[i]);
+   }
+
+   readdata = deltachainspan;
+   density = 1.0;
+
+   if (0 < deltachainspan) {
+   density = (double)chainpayload / (double)deltachainspan;
+   };
+
+   if (density >= targetdensity) {
+   result = PyTuple_Pack(1, list_revs);
+   goto done;
+   }
+
+   /* if chain is too sparse, look for relevant gaps */
+   gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap));
+   if (gaps == NULL) {
+   PyErr_NoMemory();
+   goto bail;
+   }
+
+   Py_ssize_t previous_end = -1;
+   for (i = 0; i < num_revs; i++) {
+   Py_ssize_t revstart = index_get

[PATCH 13 of 14] cext: increment the `parsers` module version

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541785099 -3600
#  Fri Nov 09 18:38:19 2018 +0100
# Node ID 0d337528d627f35f8337fc68ea18245db0a608e1
# Parent  4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
0d337528d627
cext: increment the `parsers` module version

We introduced a new `slicechunktodensity` method to the index.

diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -713,7 +713,7 @@ void dirs_module_init(PyObject *mod);
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 11;
+static const int version = 12;
 
 static void module_init(PyObject *mod)
 {
diff --git a/mercurial/policy.py b/mercurial/policy.py
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -69,7 +69,7 @@ def _importfrom(pkgname, modname):
 (r'cext', r'bdiff'): 3,
 (r'cext', r'mpatch'): 1,
 (r'cext', r'osutil'): 4,
-(r'cext', r'parsers'): 11,
+(r'cext', r'parsers'): 12,
 }
 
 # map import request to other package or module
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 14 of 14] sparse-revlog: put the native implementation of slicechunktodensity to use

2018-11-12 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1541980065 -3600
#  Mon Nov 12 00:47:45 2018 +0100
# Node ID 2d6f7e64249ddfce01ad5bea9b7ae409c752801f
# Parent  0d337528d627f35f8337fc68ea18245db0a608e1
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
2d6f7e64249d
sparse-revlog: put the native implementation of slicechunktodensity to use

When possible, the C implementation of delta chain slicing will be used.
providing a large boost in performance for this operation.

To take a practical example of restoring manifest revision '59547c40bc4c' for
a reference NetBeans repository (using sparse-revlog). The media time of the
step `slice-sparse-chain` of `perfrevlogrevision` improve from 0.660 ms to
0.098 ms;

The full series move delta chain slicing from 1.120 ms to 0.098 ms;

Implementing _slicechunktosize into C would yield further improvements.
However, the performance seems good enough for now.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -115,9 +115,12 @@ def slicechunk(revlog, revs, targetsize=
 targetsize = max(targetsize, revlog._srmingapsize)
 # targetsize should not be specified when evaluating delta candidates:
 # * targetsize is used to ensure we stay within specification when reading,
-for chunk in _slicechunktodensity(revlog, revs,
-  revlog._srdensitythreshold,
-  revlog._srmingapsize):
+densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
+if densityslicing is None:
+densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
+for chunk in densityslicing(revs,
+revlog._srdensitythreshold,
+revlog._srmingapsize):
 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
 yield subchunk
 
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 1 of 3] help: add internals.extensions topic

2018-11-12 Thread Yuya Nishihara
# HG changeset patch
# User Yuya Nishihara 
# Date 1542025734 -32400
#  Mon Nov 12 21:28:54 2018 +0900
# Node ID 0a27ee6fb750532b14363b82f0c02ca7d37868da
# Parent  ef694e477783bb832b70ab6189eafa67b8020f00
help: add internals.extensions topic

I think it's better to include the API overview in core as we now have the
internals section in our help system.

Retrieved from the wiki, and formatted as reST. Several wiki links are
removed since they are invalid in the help. The sections about example
extension are removed at all as they seemed too verbose.

https://www.mercurial-scm.org/wiki/WritingExtensions

diff --git a/contrib/wix/help.wxs b/contrib/wix/help.wxs
--- a/contrib/wix/help.wxs
+++ b/contrib/wix/help.wxs
@@ -47,6 +47,7 @@
 
 
 
+
 
 
 
diff --git a/mercurial/help.py b/mercurial/help.py
--- a/mercurial/help.py
+++ b/mercurial/help.py
@@ -311,6 +311,8 @@ internalstable = sorted([
  loaddoc('changegroups', subdir='internals')),
 (['config'], _('Config Registrar'),
  loaddoc('config', subdir='internals')),
+(['extensions', 'extension'], _('Extension API'),
+ loaddoc('extensions', subdir='internals')),
 (['requirements'], _('Repository Requirements'),
  loaddoc('requirements', subdir='internals')),
 (['revlogs'], _('Revision Logs'),
diff --git a/mercurial/help/internals/extensions.txt 
b/mercurial/help/internals/extensions.txt
new file mode 100644
--- /dev/null
+++ b/mercurial/help/internals/extensions.txt
@@ -0,0 +1,321 @@
+Extensions allow the creation of new features and using them directly from
+the main hg command line as if they were built-in commands. The extensions
+have full access to the *internal* API.
+
+Use of Mercurial's internal API very likely makes your code subject to
+Mercurial's license. Before going any further, read the License page.
+
+There are NO guarantees that third-party code calling into Mercurial's
+internals won't break from release to release. If you do use Mercurial's API
+for published third-party code, we expect you to test your code before each
+major Mercurial release. This will prevent various bug reports from your users
+when they upgrade their copy of Mercurial.
+
+File Layout
+===
+
+Extensions are usually written as simple python modules. Larger ones are
+better split into multiple modules of a single package (see the convert
+extension). The package root module gives its name to the extension and
+implements the ``cmdtable`` and optional callbacks described below.
+
+Command table
+=
+
+To write your own extension, your python module can provide an optional dict
+named ``cmdtable`` with entries describing each command. A command should be
+registered to the ``cmdtable`` by ``@command`` decorator.
+
+Example using ``@command`` decorator (requires Mercurial 1.9)::
+
+from mercurial import cmdutil
+from mercurial.i18n import _
+
+cmdtable = {}
+command = cmdutil.command(cmdtable)
+
+@command('print-parents',
+[('s', 'short', None, _('print short form')),
+ ('l', 'long', None, _('print long form'))],
+_('[options] node'))
+def printparents(ui, repo, node, **opts):
+...
+
+The cmdtable dictionary
+---
+
+The ``cmdtable`` dictionary uses as key the new command names, and, as value,
+a tuple containing:
+
+1. the function to be called when the command is used.
+2. a list of options the command can take.
+3. a command line synopsis for the command (the function docstring is used for
+   the full help).
+
+List of options
+---
+
+All the command flag options are documented in the mercurial/fancyopts.py
+sources.
+
+The options list is a list of tuples containing:
+
+1. the short option letter, or ``''`` if no short option is available
+   (for example, ``o`` for a ``-o`` option).
+2. the long option name (for example, ``option`` for a ``--option`` option).
+3. a default value for the option.
+4. a help string for the option (it's possible to omit the "hg newcommand"
+   part and only the options and parameter substring is needed).
+
+Command function signatures
+---
+
+Functions that implement new commands always receive a ``ui`` and usually
+a ``repo`` parameter. The rest of parameters are taken from the command line
+items that don't start with a dash and are passed in the same order they were
+written. If no default value is given in the parameter list they are required.
+
+If there is no repo to be associated with the command and consequently no
+``repo`` passed, then ``norepo=True`` should be passed to the ``@command``
+decorator::
+
+@command('mycommand', [], norepo=True)
+def mycommand(ui, **opts):
+...
+
+For examples of ``norepo``, see the convert extension.
+
+Command function docstrings
+===
+
+The docstring of your function is used as the main help text, shown by
+``hg he

[PATCH 2 of 3] help: merge section about uisetup() and extsetup()

2018-11-12 Thread Yuya Nishihara
# HG changeset patch
# User Yuya Nishihara 
# Date 1542029184 -32400
#  Mon Nov 12 22:26:24 2018 +0900
# Node ID f1ad5eaa63c8f4ac5a9b5c7553d40cc33792a34e
# Parent  0a27ee6fb750532b14363b82f0c02ca7d37868da
help: merge section about uisetup() and extsetup()

They are technically the same callback, called only once per process.
The section name "ui setup" is confusing, so shouldn't be used.

diff --git a/mercurial/help/internals/extensions.txt 
b/mercurial/help/internals/extensions.txt
--- a/mercurial/help/internals/extensions.txt
+++ b/mercurial/help/internals/extensions.txt
@@ -151,30 +151,23 @@ before the next phase begins. In the fir
 loaded and registered with Mercurial. This means that you can find all enabled
 extensions with ``extensions.find`` in the following phases.
 
-ui setup
-
+Extension setup
+---
 
-Extensions can implement an optional callback named ``uisetup``. ``uisetup``
-is called when the extension is first loaded and receives a ui object::
+There are two callbacks to be called when extensions are loaded, named
+``uisetup`` and ``extsetup``. ``uisetup`` is called first for each extension,
+then ``extsetup`` is called. This means ``extsetup`` can be useful in case
+one extension optionally depends on another extension.
+
+Both ``uisetup`` and ``extsetup`` receive a ui object::
 
 def uisetup(ui):
 # ...
 
-Extension setup

-
-Extensions can implement an optional callback named ``extsetup``. It is
-called after all the extension are loaded, and can be useful in case one
-extension optionally depends on another extension. Signature::
-
-def extsetup():
+def extsetup(ui):
 # ...
 
-Mercurial version 8e6019b16a7d and later (that is post-1.3.1) will pass
-a ``ui``` argument to ``extsetup``::
-
-def extsetup(ui):
-# ...
+In Mercurial 1.3.1 or earlier, ``extsetup`` takes no argument.
 
 Command table setup
 ---
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 3 of 3] help: document weird behavior of uisetup()

2018-11-12 Thread Yuya Nishihara
# HG changeset patch
# User Yuya Nishihara 
# Date 1542030696 -32400
#  Mon Nov 12 22:51:36 2018 +0900
# Node ID bc299f37cb3f501048499b39d4fd63b6a5a32902
# Parent  f1ad5eaa63c8f4ac5a9b5c7553d40cc33792a34e
help: document weird behavior of uisetup()

While refactoring ui.log() API, it was really annoying that uisetup() is
actually an extsetup() the phase 1. Let's document that. I'm planning to
add another uisetup() which is called per ui instance, though I can't think
of a good name for it.

diff --git a/mercurial/help/internals/extensions.txt 
b/mercurial/help/internals/extensions.txt
--- a/mercurial/help/internals/extensions.txt
+++ b/mercurial/help/internals/extensions.txt
@@ -159,7 +159,8 @@ There are two callbacks to be called whe
 then ``extsetup`` is called. This means ``extsetup`` can be useful in case
 one extension optionally depends on another extension.
 
-Both ``uisetup`` and ``extsetup`` receive a ui object::
+Both ``uisetup`` and ``extsetup`` receive a ui object with the local
+repository configuration::
 
 def uisetup(ui):
 # ...
@@ -167,6 +168,13 @@ Both ``uisetup`` and ``extsetup`` receiv
 def extsetup(ui):
 # ...
 
+Be aware that ``uisetup`` in NOT the function to configure a ``ui`` instance.
+It's called only once per process, not per ``ui`` instance. Also, any changes
+to the ``ui`` may be discarded because the ``ui`` here temporarily loaded
+local configuration. So, it's generally wrong to do `ui.setconfig()` in
+these callbacks. Notable exception is setting ``pre/post-`` hooks
+and extending ``ui.__class__``.
+
 In Mercurial 1.3.1 or earlier, ``extsetup`` takes no argument.
 
 Command table setup
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 12 of 14] sparse-revlog: introduce native (C) implementation of slicechunktodensity

2018-11-12 Thread Yuya Nishihara
On Mon, 12 Nov 2018 10:55:47 +0100, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld 
> # Date 1541785632 -3600
> #  Fri Nov 09 18:47:12 2018 +0100
> # Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
> # Parent  0ea42453fa491793d1e145f5093b65e84cb65e97
> # EXP-Topic sparse-perf
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
> 4a1104eade1d
> sparse-revlog: introduce native (C) implementation of slicechunktodensity

Just quickly scanned this patch, no other patches in this series nor "sparse"
logic is reviewed.

> +struct Gap {
> + long size;
> + long idx;
> +};
> +
> +static int gap_compare(const void *left, const void *right)
> +{
> + return ((struct Gap *)left)->size - ((struct Gap *)right)->size;
> +}
> +static int long_compare(const void *left, const void *right)
> +{
> + return *(long *)left - *(long *)right;
> +}

Nit: (const  *) as the argument is const void*.

If we're sure 'left - right' never exceeds the int range, it might be better
to explicitly cast the result to (int) to silence possible warning.

> +static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
> +{
> + /* method arguments */
> + PyObject *list_revs = NULL; /* revisions in the chain */
> + double targetdensity = 0.5; /* min density to achieve */
> + long mingapsize = 0;/* threshold to ignore gaps */
> +
> + /* other core variables */
> + long i;  /* used for various iteration */
> + PyObject *result = NULL; /* the final return of the function */
> +
> + /* generic information about the delta chain being slice */
> + Py_ssize_t num_revs = 0; /* size of the full delta chain */
> + Py_ssize_t *revs = NULL; /* native array of revision in the chain */
> + long chainpayload = 0;   /* sum of all delta in the chain */
> + long deltachainspan = 0; /* distance from first byte to last byte */
> +
> + /* variable used for slicing the delta chain */
> + long readdata = 0;  /* amount of data currently planned to be read */
> + double density = 0; /* ration of payload data compared to read ones */
> + struct Gap *gaps = NULL; /* array of notable gap in the chain */
> + long num_gaps = 0; /* total number of notable gap recorded so far */
> + Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
> + long num_selected = 0;   /* number of gaps skipped */

Maybe i, num_rgaps, num_selected should be Py_ssize_t? In general, long can
be narrower than ssize_t.

> + PyObject *chunk = NULL;  /* individual slice */
> + PyObject *allchunks = PyList_New(num_selected); /* all slices */

Needs to make sure that allchunks isn't NULL.

And it's probably better to just initialize the list with literal 0, since
we have no for-loop to fill in the list items up to num_selected.

> + /* parsing argument */
> + if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs,
> +   &targetdensity, &mingapsize)) {
> + goto bail;
> + }
> +
> + /* If the delta chain contains a single element, we do not need slicing
> +  */
> + num_revs = PyList_GET_SIZE(list_revs);
> + if (num_revs <= 1) {
> + result = PyTuple_Pack(1, list_revs);
> + goto done;
> + }
> +
> + /* Turn the python list into a native integer array (for efficiency) */
> + revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t));

num_revs * sizeof(...) can overflow. Using calloc() is safer if the zeroing
cost isn't significant.

> + if (revs == NULL) {
> + PyErr_NoMemory();
> + goto bail;
> + }
> + for (i = 0; i < num_revs; i++) {
> + Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
> + if (revnum == -1 && PyErr_Occurred()) {
> + goto bail;
> + }
> + revs[i] = revnum;
> + }

Are we sure revnum is in valid range?

> +
> + /* Compute and check various property of the unsliced delta chain */
> + deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
> +
> + if (deltachainspan <= mingapsize) {
> + result = PyTuple_Pack(1, list_revs);
> + goto done;
> + }
> + chainpayload = 0;
> + for (i = 0; i < num_revs; i++) {
> + chainpayload += index_get_length(self, revs[i]);
> + }
> +
> + readdata = deltachainspan;
> + density = 1.0;
> +
> + if (0 < deltachainspan) {
> + density = (double)chainpayload / (double)deltachainspan;
> + };
> +
> + if (density >= targetdensity) {
> + result = PyTuple_Pack(1, list_revs);
> + goto done;
> + }
> +
> + /* if chain is too sparse, look for relevant gaps */
> + gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap));

This, too.

D5254: packaging: add Fedora 29 target

2018-11-12 Thread durin42 (Augie Fackler)
durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5254

AFFECTED FILES
  Makefile
  contrib/packaging/Makefile
  contrib/packaging/docker/fedora28
  contrib/packaging/docker/fedora29

CHANGE DETAILS

diff --git a/contrib/packaging/docker/fedora28 
b/contrib/packaging/docker/fedora29
copy from contrib/packaging/docker/fedora28
copy to contrib/packaging/docker/fedora29
--- a/contrib/packaging/docker/fedora28
+++ b/contrib/packaging/docker/fedora29
@@ -1,4 +1,4 @@
-FROM fedora:28
+FROM fedora:29
 
 RUN groupadd -g 1000 build && \
 useradd -u 1000 -g 1000 -s /bin/bash -d /build -m build
diff --git a/contrib/packaging/Makefile b/contrib/packaging/Makefile
--- a/contrib/packaging/Makefile
+++ b/contrib/packaging/Makefile
@@ -14,7 +14,8 @@
 FEDORA_RELEASES := \
   20 \
   21 \
-  28
+  28 \
+  29
 
 CENTOS_RELEASES := \
   5 \
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -178,6 +178,7 @@
   docker-fedora20 \
   docker-fedora21 \
   docker-fedora28 \
+  docker-fedora29 \
   docker-ubuntu-trusty \
   docker-ubuntu-trusty-ppa \
   docker-ubuntu-xenial \
@@ -189,6 +190,7 @@
   fedora20 \
   fedora21 \
   fedora28 \
+  fedora29 \
   linux-wheels \
   linux-wheels-x86_64 \
   linux-wheels-i686 \



To: durin42, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5235: revlog: replace PyInt_AS_LONG with a more portable helper function

2018-11-12 Thread durin42 (Augie Fackler)
durin42 updated this revision to Diff 12502.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5235?vs=12469&id=12502

REVISION DETAIL
  https://phab.mercurial-scm.org/D5235

AFFECTED FILES
  mercurial/cext/revlog.c
  mercurial/cext/util.h

CHANGE DETAILS

diff --git a/mercurial/cext/util.h b/mercurial/cext/util.h
--- a/mercurial/cext/util.h
+++ b/mercurial/cext/util.h
@@ -58,4 +58,17 @@
return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
 }
 
+/* Convert a PyInt or PyLong to a long. Returns false if there is an
+   error, in which case an exception will already have been set. */
+static inline bool pylong_to_long(PyObject *pylong, long *out)
+{
+   *out = PyLong_AsLong(pylong);
+   /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
+* not an error. */
+   if (*out != -1) {
+   return true;
+   }
+   return PyErr_Occurred() == NULL;
+}
+
 #endif /* _HG_UTIL_H_ */
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -24,7 +24,6 @@
 #define PyInt_Check PyLong_Check
 #define PyInt_FromLong PyLong_FromLong
 #define PyInt_FromSsize_t PyLong_FromSsize_t
-#define PyInt_AS_LONG PyLong_AS_LONG
 #define PyInt_AsLong PyLong_AsLong
 #endif
 
@@ -126,7 +125,7 @@
errclass = PyDict_GetItemString(dict, "RevlogError");
if (errclass == NULL) {
PyErr_SetString(PyExc_SystemError,
-   "could not find RevlogError");
+   "could not find RevlogError");
goto cleanup;
}
 
@@ -146,7 +145,7 @@
if (self->inlined && pos > 0) {
if (self->offsets == NULL) {
self->offsets = PyMem_Malloc(self->raw_length *
-sizeof(*self->offsets));
+sizeof(*self->offsets));
if (self->offsets == NULL)
return (const char *)PyErr_NoMemory();
inline_scan(self, self->offsets);
@@ -161,10 +160,17 @@
 int maxrev)
 {
if (rev >= self->length) {
+   long tmp;
PyObject *tuple =
PyList_GET_ITEM(self->added, rev - self->length);
-   ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
-   ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
+   if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
+   return -1;
+   }
+   ps[0] = (int)tmp;
+   if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
+   return -1;
+   }
+   ps[1] = (int)tmp;
} else {
const char *data = index_deref(self, rev);
ps[0] = getbe32(data + 24);
@@ -249,8 +255,8 @@
c_node_id = data + 32;
 
entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
- base_rev, link_rev, parent_1, parent_2, c_node_id,
- 20);
+ base_rev, link_rev, parent_1, parent_2, c_node_id,
+ 20);
 
if (entry) {
PyObject_GC_UnTrack(entry);
@@ -296,7 +302,7 @@
const char *node = index_node(self, pos);
if (node == NULL) {
PyErr_Format(PyExc_IndexError, "could not access rev %d",
-(int)pos);
+(int)pos);
}
return node;
 }
@@ -464,7 +470,10 @@
if (iter == NULL)
return -2;
while ((iter_item = PyIter_Next(iter))) {
-   iter_item_long = PyInt_AS_LONG(iter_item);
+   if (!pylong_to_long(iter_item, &iter_item_long)) {
+   Py_DECREF(iter_item);
+   return -2;
+   }
Py_DECREF(iter_item);
if (iter_item_long < min_idx)
min_idx = iter_item_long;
@@ -518,8 +527,8 @@
 
/* Get arguments */
if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
- &PyList_Type, &roots, &PyBool_Type,
- &includepatharg))
+ &PyList_Type, &roots, &PyBool_Type,
+ &includepatharg))
goto bail;
 
if (includepatharg == Py_True)
@@ -692,7 +701,7 @@
PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
if (!PyList_Check(phaseroots)) {
PyErr_SetString(PyExc_TypeError,
-   "roots item must be a list");
+  

Re: [PATCH 4 of 4 accept-scripts] land: add test documenting contents of take.log

2018-11-12 Thread Augie Fackler
These scripts are now live, and the matching infra change is out for review.

> On Nov 9, 2018, at 10:46, Martin von Zweigbergk  wrote:
> 
> Sounds good to me. 
> 
> On Fri, Nov 9, 2018, 07:45 Augie Fackler   wrote:
> 
> 
> > On Nov 8, 2018, at 00:12, Martin von Zweigbergk  > > wrote:
> > 
> > 
> > 
> > On Wed, Nov 7, 2018 at 5:06 PM Augie Fackler  > > wrote:
> >> # HG changeset patch
> >> # User Augie Fackler mailto:r...@durin42.com>>
> >> # Date 1541634897 18000
> >> #  Wed Nov 07 18:54:57 2018 -0500
> >> # Node ID 3e0f9891db118868fe54315231ba52fc84501a37
> >> # Parent  69c1fd928cf6668e4ddbd6d26ebb53bb75e374c5
> >> land: add test documenting contents of take.log
> >> 
> > This series looks good to me. Thanks! I'll let Kevin queue it (because I 
> > have never done that and I'm too lazy to figure out how).
> 
> If you're okay with it, I'll push it per your review and write up the 
> associated infra change to deploy it so we start collecting data again.

___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 1 of 3] test-narrow-clone-stream: include uppercase letter in filename

2018-11-12 Thread Pulkit Goyal
On Sat, Nov 10, 2018 at 6:26 AM Yuya Nishihara  wrote:

> # HG changeset patch
> # User Yuya Nishihara 
> # Date 1541817382 -32400
> #  Sat Nov 10 11:36:22 2018 +0900
> # Node ID 2016ce0a58b83c08f110ee52feb1dd23c4179871
> # Parent  157f0e29eaa35adc2488709f5c00f2865a4d5361
> test-narrow-clone-stream: include uppercase letter in filename
>
> This makes encoded filenames differ from the original names.
>

Queued this series. Many many thanks for following up on this.

(I was on vacation and hence couldn't reply to your related comments)
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5138: narrow: only send includepats and excludepats if they are not empty

2018-11-12 Thread pulkit (Pulkit Goyal)
pulkit added inline comments.

INLINE COMMENTS

> martinvonz wrote in narrowcommands.py:142-143
> should these also be changed?

From these, do you mean the oldincludepats and oldexcludepats?

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5138

To: pulkit, durin42, martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5255: mergetools: adjust Beyond Compare config on Mac/Linux

2018-11-12 Thread durin42 (Augie Fackler)
durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Set the labels of the Linux and Mac versions of Beyond Compare from
  Mercurial's builtin variables, same as the Windows version.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5255

AFFECTED FILES
  mercurial/default.d/mergetools.rc

CHANGE DETAILS

diff --git a/mercurial/default.d/mergetools.rc 
b/mercurial/default.d/mergetools.rc
--- a/mercurial/default.d/mergetools.rc
+++ b/mercurial/default.d/mergetools.rc
@@ -101,14 +101,14 @@
 beyondcompare3.diffargs=/lro /lefttitle=$plabel1 /righttitle=$clabel /solo 
/expandall $parent $child
 
 ; Linux version of Beyond Compare
-bcompare.args=$local $other $base -mergeoutput=$output -ro -lefttitle=parent1 
-centertitle=base -righttitle=parent2 -outputtitle=merged -automerge 
-reviewconflicts -solo
+bcompare.args=$local $other $base -mergeoutput=$output -ro 
-lefttitle=$labellocal -centertitle=$labelbase -righttitle=$labelother 
-outputtitle=merged -automerge -reviewconflicts -solo
 bcompare.gui=True
 bcompare.priority=-1
 bcompare.diffargs=-lro -lefttitle=$plabel1 -righttitle=$clabel -solo 
-expandall $parent $child
 
 ; OS X version of Beyond Compare
 bcomposx.executable = /Applications/Beyond Compare.app/Contents/MacOS/bcomp
-bcomposx.args=$local $other $base -mergeoutput=$output -ro -lefttitle=parent1 
-centertitle=base -righttitle=parent2 -outputtitle=merged -automerge 
-reviewconflicts -solo
+bcomposx.args=$local $other $base -mergeoutput=$output -ro 
-lefttitle=$labellocal -centertitle=$labelbase -righttitle=$labelother 
-outputtitle=merged -automerge -reviewconflicts -solo
 bcomposx.gui=True
 bcomposx.priority=-1
 bcomposx.diffargs=-lro -lefttitle=$plabel1 -righttitle=$clabel -solo 
-expandall $parent $child



To: durin42, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5254: packaging: add Fedora 29 target

2018-11-12 Thread durin42 (Augie Fackler)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGd0a6b2c8f8b6: packaging: add Fedora 29 target (authored by 
durin42, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5254?vs=12501&id=12504

REVISION DETAIL
  https://phab.mercurial-scm.org/D5254

AFFECTED FILES
  Makefile
  contrib/packaging/Makefile
  contrib/packaging/docker/fedora28
  contrib/packaging/docker/fedora29

CHANGE DETAILS

diff --git a/contrib/packaging/docker/fedora28 
b/contrib/packaging/docker/fedora29
copy from contrib/packaging/docker/fedora28
copy to contrib/packaging/docker/fedora29
--- a/contrib/packaging/docker/fedora28
+++ b/contrib/packaging/docker/fedora29
@@ -1,4 +1,4 @@
-FROM fedora:28
+FROM fedora:29
 
 RUN groupadd -g 1000 build && \
 useradd -u 1000 -g 1000 -s /bin/bash -d /build -m build
diff --git a/contrib/packaging/Makefile b/contrib/packaging/Makefile
--- a/contrib/packaging/Makefile
+++ b/contrib/packaging/Makefile
@@ -14,7 +14,8 @@
 FEDORA_RELEASES := \
   20 \
   21 \
-  28
+  28 \
+  29
 
 CENTOS_RELEASES := \
   5 \
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -178,6 +178,7 @@
   docker-fedora20 \
   docker-fedora21 \
   docker-fedora28 \
+  docker-fedora29 \
   docker-ubuntu-trusty \
   docker-ubuntu-trusty-ppa \
   docker-ubuntu-xenial \
@@ -189,6 +190,7 @@
   fedora20 \
   fedora21 \
   fedora28 \
+  fedora29 \
   linux-wheels \
   linux-wheels-x86_64 \
   linux-wheels-i686 \



To: durin42, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH 14 of 14] sparse-revlog: put the native implementation of slicechunktodensity to use

2018-11-12 Thread Augie Fackler


> On Nov 12, 2018, at 04:55, Boris Feld  wrote:
> 
> # HG changeset patch
> # User Boris Feld 
> # Date 1541980065 -3600
> #  Mon Nov 12 00:47:45 2018 +0100
> # Node ID 2d6f7e64249ddfce01ad5bea9b7ae409c752801f
> # Parent  0d337528d627f35f8337fc68ea18245db0a608e1
> # EXP-Topic sparse-perf
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
> 2d6f7e64249d
> sparse-revlog: put the native implementation of slicechunktodensity to use

I didn't review closely, but if we're expanding the functionality of revlog.c 
it might make sense to try and fuzz it. parsers.so is already fuzzed for the 
manifest fuzzer, so in theory that could be a guide for how to fuzz other parts 
of parsers.so.

(Not a requirement, but definitely something worth exploring.)

> 
> When possible, the C implementation of delta chain slicing will be used.
> providing a large boost in performance for this operation.
> 
> To take a practical example of restoring manifest revision '59547c40bc4c' for
> a reference NetBeans repository (using sparse-revlog). The media time of the
> step `slice-sparse-chain` of `perfrevlogrevision` improve from 0.660 ms to
> 0.098 ms;
> 
> The full series move delta chain slicing from 1.120 ms to 0.098 ms;
> 
> Implementing _slicechunktosize into C would yield further improvements.
> However, the performance seems good enough for now.
> 
> diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
> --- a/mercurial/revlogutils/deltas.py
> +++ b/mercurial/revlogutils/deltas.py
> @@ -115,9 +115,12 @@ def slicechunk(revlog, revs, targetsize=
> targetsize = max(targetsize, revlog._srmingapsize)
> # targetsize should not be specified when evaluating delta candidates:
> # * targetsize is used to ensure we stay within specification when 
> reading,
> -for chunk in _slicechunktodensity(revlog, revs,
> -  revlog._srdensitythreshold,
> -  revlog._srmingapsize):
> +densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
> +if densityslicing is None:
> +densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, 
> z)
> +for chunk in densityslicing(revs,
> +revlog._srdensitythreshold,
> +revlog._srmingapsize):
> for subchunk in _slicechunktosize(revlog, chunk, targetsize):
> yield subchunk
> 
> ___
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


mercurial@40546: 3 new changesets (3 on stable)

2018-11-12 Thread Mercurial Commits
3 new changesets (3 on stable) in mercurial:

https://www.mercurial-scm.org/repo/hg/rev/5b530d767e67
changeset:   40544:5b530d767e67
branch:  stable
parent:  40473:8239d4e5d05f
user:Matt Harbison 
date:Thu Nov 08 20:04:07 2018 -0500
summary: help: unjumble the list of default config values for 
`internals.config`

https://www.mercurial-scm.org/repo/hg/rev/6107d4549fcc
changeset:   40545:6107d4549fcc
branch:  stable
user:Gregory Szorc 
date:Fri Nov 09 23:49:39 2018 +
summary: hgweb: cast bytearray to bytes

https://www.mercurial-scm.org/repo/hg/rev/7e2c58b08e74
changeset:   40546:7e2c58b08e74
branch:  stable
tag: tip
user:Matt Harbison 
date:Sat Nov 10 22:25:12 2018 -0500
summary: phabricator: ensure the command summaries are available in 
extension help

-- 
Repository URL: https://www.mercurial-scm.org/repo/hg
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH] test-subrepo: document why share magically works even if subrepos are missing

2018-11-12 Thread Augie Fackler
queued, thanks

> On Nov 9, 2018, at 22:13, Yuya Nishihara  wrote:
> 
> # HG changeset patch
> # User Yuya Nishihara 
> # Date 1541818881 -32400
> #  Sat Nov 10 12:01:21 2018 +0900
> # Node ID ecc1380d217c65025da4b25ec6792ce3a0da0209
> # Parent  96eb29b6c0edd5a3483b44a8f8f02eee2a65be70
> test-subrepo: document why share magically works even if subrepos are missing
> 
> I was confused how it's working while reviewing fb490d798be0, "share: reload
> repo after adjusting it in postshare()."
> 
> diff --git a/tests/test-subrepo-recursion.t b/tests/test-subrepo-recursion.t
> --- a/tests/test-subrepo-recursion.t
> +++ b/tests/test-subrepo-recursion.t
> @@ -559,8 +559,9 @@ The newly cloned subrepos contain no wor
>   commit: (clean)
>   update: 4 new changesets (update)
> 
> -Sharing a local repo without the locally referenced subrepo (i.e. it was 
> never
> -updated from null) works, but clone fails.
> +Sharing a local repo with missing local subrepos (i.e. it was never updated
> +from null) works because the default path is copied from the source repo,
> +whereas clone should fail.
> 
>   $ hg --config progress.disable=True clone -U ../empty ../empty2
> 
> ___
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5256: manifest: make sure there's a filename before bothering to look for newline

2018-11-12 Thread durin42 (Augie Fackler)
durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  There's no valid manifest that would have no characters before the NUL byte on
  a line, and this fixes some erratic timeouts in the fuzzer.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5256

AFFECTED FILES
  mercurial/cext/manifest.c
  tests/test-manifest.py

CHANGE DETAILS

diff --git a/tests/test-manifest.py b/tests/test-manifest.py
--- a/tests/test-manifest.py
+++ b/tests/test-manifest.py
@@ -4,6 +4,7 @@
 import itertools
 import silenttestrunner
 import unittest
+import zlib
 
 from mercurial import (
 manifest as manifestmod,
@@ -397,6 +398,29 @@
 def parsemanifest(self, text):
 return manifestmod.manifestdict(text)
 
+def testObviouslyBogusManifest(self):
+# This is a 163k manifest that came from oss-fuzz. It was a
+# timeout there, but when run normally it doesn't seem to
+# present any particular slowness.
+data = zlib.decompress(
+'x\x9c\xed\xce;\n\x83\x00\x10\x04\xd0\x8deNa\x93~\xf1\x03\xc9q\xf4'
+'\x14\xeaU\xbdB\xda\xd4\xe6Cj\xc1FA\xde+\x86\xe9f\xa2\xfci\xbb\xfb'
+'\xa3\xef\xea\xba\xca\x7fk\x86q\x9a\xc6\xc8\xcc&\xb3\xcf\xf8\xb8|#'
+'\x8a9\x00\xd8\xe6v\xf4\x01N\xe1\n\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\xc0\x8aey\x1d}\x01\xd8\xe0\xb9\xf3\xde\x1b\xcf\x17'
+'\xac\xbe')
+with self.assertRaises(ValueError):
+m = self.parsemanifest(data)
+
 class testtreemanifest(unittest.TestCase, basemanifesttests):
 def parsemanifest(self, text):
 return manifestmod.treemanifest(b'', text)
diff --git a/mercurial/cext/manifest.c b/mercurial/cext/manifest.c
--- a/mercurial/cext/manifest.c
+++ b/mercurial/cext/manifest.c
@@ -38,6 +38,7 @@
 #define MANIFEST_OOM -1
 #define MANIFEST_NOT_SORTED -2
 #define MANIFEST_MALFORMED -3
+#define MANIFEST_BOGUS_FILENAME -4
 
 /* get the length of the path for a line */
 static size_t pathlen(line *l)
@@ -115,7 +116,13 @@
char *prev = NULL;
while (len > 0) {
line *l;
-   char *next = memchr(data, '\n', len);
+   char *next;
+   if (*data == '\0') {
+   /* It's implausible there's no filename, don't
+* even bother looking for the newline. */
+   return MANIFEST_BOGUS_FILENAME;
+   }
+   next = memchr(data, '\n', len);
if (!next) {
return MANIFEST_MALFORMED;
}
@@ -190,6 +197,11 @@
PyErr_Format(PyExc_ValueError,
 "Manifest did not end in a newline.");
break;
+   case MANIFEST_BOGUS_FILENAME:
+   PyErr_Format(
+   PyExc_ValueError,
+   "Manifest had an entry with a zero-length filename.");
+   break;
default:
PyErr_Format(PyExc_ValueError,
 "Unknown problem parsing manifest.");



To: durin42, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5257: manifest: also reject obviously-too-short lines when parsing lines

2018-11-12 Thread durin42 (Augie Fackler)
durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5257

AFFECTED FILES
  mercurial/cext/manifest.c

CHANGE DETAILS

diff --git a/mercurial/cext/manifest.c b/mercurial/cext/manifest.c
--- a/mercurial/cext/manifest.c
+++ b/mercurial/cext/manifest.c
@@ -39,6 +39,7 @@
 #define MANIFEST_NOT_SORTED -2
 #define MANIFEST_MALFORMED -3
 #define MANIFEST_BOGUS_FILENAME -4
+#define MANIFEST_TOO_SHORT_LINE -5
 
 /* get the length of the path for a line */
 static size_t pathlen(line *l)
@@ -126,6 +127,15 @@
if (!next) {
return MANIFEST_MALFORMED;
}
+   if ((next - data) < 22) {
+   /* We should have at least 22 bytes in a line:
+  1 byte filename
+  1 NUL
+  20 bytes of hash
+  so we can give up here.
+   */
+   return MANIFEST_TOO_SHORT_LINE;
+   }
next++; /* advance past newline */
if (!realloc_if_full(self)) {
return MANIFEST_OOM; /* no memory */
@@ -202,6 +212,11 @@
PyExc_ValueError,
"Manifest had an entry with a zero-length filename.");
break;
+   case MANIFEST_TOO_SHORT_LINE:
+   PyErr_Format(
+   PyExc_ValueError,
+   "Manifest had implausibly-short line.");
+   break;
default:
PyErr_Format(PyExc_ValueError,
 "Unknown problem parsing manifest.");



To: durin42, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5258: manifest: perform cheap checks before potentially allocating memory

2018-11-12 Thread durin42 (Augie Fackler)
durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5258

AFFECTED FILES
  mercurial/cext/manifest.c

CHANGE DETAILS

diff --git a/mercurial/cext/manifest.c b/mercurial/cext/manifest.c
--- a/mercurial/cext/manifest.c
+++ b/mercurial/cext/manifest.c
@@ -137,13 +137,13 @@
return MANIFEST_TOO_SHORT_LINE;
}
next++; /* advance past newline */
-   if (!realloc_if_full(self)) {
-   return MANIFEST_OOM; /* no memory */
-   }
if (prev && strcmp(prev, data) > -1) {
/* This data isn't sorted, so we have to abort. */
return MANIFEST_NOT_SORTED;
}
+   if (!realloc_if_full(self)) {
+   return MANIFEST_OOM; /* no memory */
+   }
l = self->lines + ((self->numlines)++);
l->start = data;
l->len = next - data;



To: durin42, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D5256: manifest: make sure there's a filename before bothering to look for newline

2018-11-12 Thread durin42 (Augie Fackler)
durin42 updated this revision to Diff 12508.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5256?vs=12505&id=12508

REVISION DETAIL
  https://phab.mercurial-scm.org/D5256

AFFECTED FILES
  mercurial/cext/manifest.c
  tests/test-manifest.py

CHANGE DETAILS

diff --git a/tests/test-manifest.py b/tests/test-manifest.py
--- a/tests/test-manifest.py
+++ b/tests/test-manifest.py
@@ -4,6 +4,7 @@
 import itertools
 import silenttestrunner
 import unittest
+import zlib
 
 from mercurial import (
 manifest as manifestmod,
@@ -397,6 +398,29 @@
 def parsemanifest(self, text):
 return manifestmod.manifestdict(text)
 
+def testObviouslyBogusManifest(self):
+# This is a 163k manifest that came from oss-fuzz. It was a
+# timeout there, but when run normally it doesn't seem to
+# present any particular slowness.
+data = zlib.decompress(
+'x\x9c\xed\xce;\n\x83\x00\x10\x04\xd0\x8deNa\x93~\xf1\x03\xc9q\xf4'
+'\x14\xeaU\xbdB\xda\xd4\xe6Cj\xc1FA\xde+\x86\xe9f\xa2\xfci\xbb\xfb'
+'\xa3\xef\xea\xba\xca\x7fk\x86q\x9a\xc6\xc8\xcc&\xb3\xcf\xf8\xb8|#'
+'\x8a9\x00\xd8\xe6v\xf4\x01N\xe1\n\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+'\x00\x00\xc0\x8aey\x1d}\x01\xd8\xe0\xb9\xf3\xde\x1b\xcf\x17'
+'\xac\xbe')
+with self.assertRaises(ValueError):
+self.parsemanifest(data)
+
 class testtreemanifest(unittest.TestCase, basemanifesttests):
 def parsemanifest(self, text):
 return manifestmod.treemanifest(b'', text)
diff --git a/mercurial/cext/manifest.c b/mercurial/cext/manifest.c
--- a/mercurial/cext/manifest.c
+++ b/mercurial/cext/manifest.c
@@ -38,6 +38,7 @@
 #define MANIFEST_OOM -1
 #define MANIFEST_NOT_SORTED -2
 #define MANIFEST_MALFORMED -3
+#define MANIFEST_BOGUS_FILENAME -4
 
 /* get the length of the path for a line */
 static size_t pathlen(line *l)
@@ -115,7 +116,13 @@
char *prev = NULL;
while (len > 0) {
line *l;
-   char *next = memchr(data, '\n', len);
+   char *next;
+   if (*data == '\0') {
+   /* It's implausible there's no filename, don't
+* even bother looking for the newline. */
+   return MANIFEST_BOGUS_FILENAME;
+   }
+   next = memchr(data, '\n', len);
if (!next) {
return MANIFEST_MALFORMED;
}
@@ -190,6 +197,11 @@
PyErr_Format(PyExc_ValueError,
 "Manifest did not end in a newline.");
break;
+   case MANIFEST_BOGUS_FILENAME:
+   PyErr_Format(
+   PyExc_ValueError,
+   "Manifest had an entry with a zero-length filename.");
+   break;
default:
PyErr_Format(PyExc_ValueError,
 "Unknown problem parsing manifest.");



To: durin42, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4899: narrow: filter files by narrowspec in ctx.matches()

2018-11-12 Thread martinvonz (Martin von Zweigbergk)
martinvonz added a comment.


  > This has no effect yet [...] for the working copy, the filtering is also 
done in the overridden dirstate.walk() in narrowdirstate.
  
  For the record, that turned out to be a (small) lie: dirstate.matches() 
doesn't call dirstate.walk(), so the override of dirstate.walk() had no effect 
here. A consequence is that we'll now filter out paths in the dirstate that are 
outside the narrowspec. That doesn't normally happen, but it can happen e.g. 
with `hg share` (that's how I noticed). I think the change is for the better, 
but I thought I should let you know that the commit message was incorrect.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4899

To: martinvonz, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel