[pypy-commit] pypy default: prepare listobject and timsort for liststrategies that will be merged later

l . diekmann Thu, 08 Sep 2011 05:52:22 -0700

Author: Lukas Diekmann <lukas.diekm...@uni-duesseldorf.de>
Branch: 
Changeset: r47161:1d6e48b23004
Date: 2011-09-08 14:48 +0200
http://bitbucket.org/pypy/pypy/changeset/1d6e48b23004/


Log:    prepare listobject and timsort for liststrategies that will be
        merged later

diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -8,7 +8,7 @@
 
 from pypy.objspace.std import slicetype
 from pypy.interpreter import gateway, baseobjspace
-from pypy.rlib.listsort import TimSort
+from pypy.rlib.listsort import make_timsort_class
 from pypy.interpreter.argument import Signature
 
 class W_ListObject(W_Object):
@@ -445,6 +445,7 @@
         self.w_key = w_key
         self.w_item = w_item
 
+TimSort = make_timsort_class()
 # NOTE: all the subclasses of TimSort should inherit from a common subclass,
 #       so make sure that only SimpleSort inherits directly from TimSort.
 #       This is necessary to hide the parent method TimSort.lt() from the
diff --git a/pypy/rlib/listsort.py b/pypy/rlib/listsort.py
--- a/pypy/rlib/listsort.py
+++ b/pypy/rlib/listsort.py
@@ -7,588 +7,589 @@
 ## ------------------------------------------------------------------------
 ##         Adapted from CPython, original code and algorithms by Tim Peters
 
-## CAREFUL:
-## this class has to be used carefully, because all the lists that are
-## sorted will be unified
+def make_timsort_class():
 
-class TimSort:
-    """TimSort(list).sort()
+    class TimSort:
+        """TimSort(list).sort()
 
-    Sorts the list in-place, using the overridable method lt() for comparison.
-    """
+        Sorts the list in-place, using the overridable method lt() for 
comparison.
+        """
 
-    def __init__(self, list, listlength=None):
-        self.list = list
-        if listlength is None:
-            listlength = len(list)
-        self.listlength = listlength
+        def __init__(self, list, listlength=None):
+            self.list = list
+            if listlength is None:
+                listlength = len(list)
+            self.listlength = listlength
 
-    def lt(self, a, b):
-        return a < b
+        def lt(self, a, b):
+            return a < b
 
-    def le(self, a, b):
-        return not self.lt(b, a)   # always use self.lt() as the primitive
+        def le(self, a, b):
+            return not self.lt(b, a)   # always use self.lt() as the primitive
 
-    # binarysort is the best method for sorting small arrays: it does
-    # few compares, but can do data movement quadratic in the number of
-    # elements.
-    # "a" is a contiguous slice of a list, and is sorted via binary insertion.
-    # This sort is stable.
-    # On entry, the first "sorted" elements are already sorted.
-    # Even in case of error, the output slice will be some permutation of
-    # the input (nothing is lost or duplicated).
+        # binarysort is the best method for sorting small arrays: it does
+        # few compares, but can do data movement quadratic in the number of
+        # elements.
+        # "a" is a contiguous slice of a list, and is sorted via binary 
insertion.
+        # This sort is stable.
+        # On entry, the first "sorted" elements are already sorted.
+        # Even in case of error, the output slice will be some permutation of
+        # the input (nothing is lost or duplicated).
 
-    def binarysort(self, a, sorted=1):
-        for start in xrange(a.base + sorted, a.base + a.len):
-            # set l to where list[start] belongs
-            l = a.base
-            r = start
-            pivot = a.list[r]
-            # Invariants:
-            # pivot >= all in [base, l).
-            # pivot  < all in [r, start).
-            # The second is vacuously true at the start.
-            while l < r:
-                p = l + ((r - l) >> 1)
-                if self.lt(pivot, a.list[p]):
-                    r = p
+        def binarysort(self, a, sorted=1):
+            for start in xrange(a.base + sorted, a.base + a.len):
+                # set l to where list[start] belongs
+                l = a.base
+                r = start
+                pivot = a.list[r]
+                # Invariants:
+                # pivot >= all in [base, l).
+                # pivot  < all in [r, start).
+                # The second is vacuously true at the start.
+                while l < r:
+                    p = l + ((r - l) >> 1)
+                    if self.lt(pivot, a.list[p]):
+                        r = p
+                    else:
+                        l = p+1
+                assert l == r
+                # The invariants still hold, so pivot >= all in [base, l) and
+                # pivot < all in [l, start), so pivot belongs at l.  Note
+                # that if there are elements equal to pivot, l points to the
+                # first slot after them -- that's why this sort is stable.
+                # Slide over to make room.
+                for p in xrange(start, l, -1):
+                    a.list[p] = a.list[p-1]
+                a.list[l] = pivot
+
+        # Compute the length of the run in the slice "a".
+        # "A run" is the longest ascending sequence, with
+        #
+        #     a[0] <= a[1] <= a[2] <= ...
+        #
+        # or the longest descending sequence, with
+        #
+        #     a[0] > a[1] > a[2] > ...
+        #
+        # Return (run, descending) where descending is False in the former 
case,
+        # or True in the latter.
+        # For its intended use in a stable mergesort, the strictness of the 
defn of
+        # "descending" is needed so that the caller can safely reverse a 
descending
+        # sequence without violating stability (strict > ensures there are no 
equal
+        # elements to get out of order).
+
+        def count_run(self, a):
+            if a.len <= 1:
+                n = a.len
+                descending = False
+            else:
+                n = 2
+                if self.lt(a.list[a.base + 1], a.list[a.base]):
+                    descending = True
+                    for p in xrange(a.base + 2, a.base + a.len):
+                        if self.lt(a.list[p], a.list[p-1]):
+                            n += 1
+                        else:
+                            break
                 else:
-                    l = p+1
-            assert l == r
-            # The invariants still hold, so pivot >= all in [base, l) and
-            # pivot < all in [l, start), so pivot belongs at l.  Note
-            # that if there are elements equal to pivot, l points to the
-            # first slot after them -- that's why this sort is stable.
-            # Slide over to make room.
-            for p in xrange(start, l, -1):
-                a.list[p] = a.list[p-1]
-            a.list[l] = pivot
+                    descending = False
+                    for p in xrange(a.base + 2, a.base + a.len):
+                        if self.lt(a.list[p], a.list[p-1]):
+                            break
+                        else:
+                            n += 1
+            return ListSlice(a.list, a.base, n), descending
 
-    # Compute the length of the run in the slice "a".
-    # "A run" is the longest ascending sequence, with
-    #
-    #     a[0] <= a[1] <= a[2] <= ...
-    #
-    # or the longest descending sequence, with
-    #
-    #     a[0] > a[1] > a[2] > ...
-    #
-    # Return (run, descending) where descending is False in the former case,
-    # or True in the latter.
-    # For its intended use in a stable mergesort, the strictness of the defn of
-    # "descending" is needed so that the caller can safely reverse a descending
-    # sequence without violating stability (strict > ensures there are no equal
-    # elements to get out of order).
+        # Locate the proper position of key in a sorted vector; if the vector
+        # contains an element equal to key, return the position immediately to 
the
+        # left of the leftmost equal element -- or to the right of the 
rightmost
+        # equal element if the flag "rightmost" is set.
+        #
+        # "hint" is an index at which to begin the search, 0 <= hint < a.len.
+        # The closer hint is to the final result, the faster this runs.
+        #
+        # The return value is the index 0 <= k <= a.len such that
+        #
+        #     a[k-1] < key <= a[k]      (if rightmost is False)
+        #     a[k-1] <= key < a[k]      (if rightmost is True)
+        #
+        # as long as the indices are in bound.  IOW, key belongs at index k;
+        # or, IOW, the first k elements of a should precede key, and the last
+        # n-k should follow key.
 
-    def count_run(self, a):
-        if a.len <= 1:
-            n = a.len
-            descending = False
-        else:
-            n = 2
-            if self.lt(a.list[a.base + 1], a.list[a.base]):
-                descending = True
-                for p in xrange(a.base + 2, a.base + a.len):
-                    if self.lt(a.list[p], a.list[p-1]):
-                        n += 1
-                    else:
+        def gallop(self, key, a, hint, rightmost):
+            assert 0 <= hint < a.len
+            if rightmost:
+                lower = self.le   # search for the largest k for which a[k] <= 
key
+            else:
+                lower = self.lt   # search for the largest k for which a[k] < 
key
+
+            p = a.base + hint
+            lastofs = 0
+            ofs = 1
+            if lower(a.list[p], key):
+                # a[hint] < key -- gallop right, until
+                #     a[hint + lastofs] < key <= a[hint + ofs]
+
+                maxofs = a.len - hint     # a[a.len-1] is highest
+                while ofs < maxofs:
+                    if lower(a.list[p + ofs], key):
+                        lastofs = ofs
+                        try:
+                            ofs = ovfcheck_lshift(ofs, 1)
+                        except OverflowError:
+                            ofs = maxofs
+                        else:
+                            ofs = ofs + 1
+                    else:  # key <= a[hint + ofs]
                         break
+
+                if ofs > maxofs:
+                    ofs = maxofs
+                # Translate back to offsets relative to a.
+                lastofs += hint
+                ofs += hint
+
             else:
-                descending = False
-                for p in xrange(a.base + 2, a.base + a.len):
-                    if self.lt(a.list[p], a.list[p-1]):
+                # key <= a[hint] -- gallop left, until
+                #     a[hint - ofs] < key <= a[hint - lastofs]
+                maxofs = hint + 1   # a[0] is lowest
+                while ofs < maxofs:
+                    if lower(a.list[p - ofs], key):
                         break
                     else:
-                        n += 1
-        return ListSlice(a.list, a.base, n), descending
+                        # key <= a[hint - ofs]
+                        lastofs = ofs
+                        try:
+                            ofs = ovfcheck_lshift(ofs, 1)
+                        except OverflowError:
+                            ofs = maxofs
+                        else:
+                            ofs = ofs + 1
+                if ofs > maxofs:
+                    ofs = maxofs
+                # Translate back to positive offsets relative to a.
+                lastofs, ofs = hint-ofs, hint-lastofs
 
-    # Locate the proper position of key in a sorted vector; if the vector
-    # contains an element equal to key, return the position immediately to the
-    # left of the leftmost equal element -- or to the right of the rightmost
-    # equal element if the flag "rightmost" is set.
-    #
-    # "hint" is an index at which to begin the search, 0 <= hint < a.len.
-    # The closer hint is to the final result, the faster this runs.
-    #
-    # The return value is the index 0 <= k <= a.len such that
-    #
-    #     a[k-1] < key <= a[k]      (if rightmost is False)
-    #     a[k-1] <= key < a[k]      (if rightmost is True)
-    #
-    # as long as the indices are in bound.  IOW, key belongs at index k;
-    # or, IOW, the first k elements of a should precede key, and the last
-    # n-k should follow key.
+            assert -1 <= lastofs < ofs <= a.len
 
-    def gallop(self, key, a, hint, rightmost):
-        assert 0 <= hint < a.len
-        if rightmost:
-            lower = self.le   # search for the largest k for which a[k] <= key
-        else:
-            lower = self.lt   # search for the largest k for which a[k] < key
+            # Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
+            # right of lastofs but no farther right than ofs.  Do a binary
+            # search, with invariant a[lastofs-1] < key <= a[ofs].
 
-        p = a.base + hint
-        lastofs = 0
-        ofs = 1
-        if lower(a.list[p], key):
-            # a[hint] < key -- gallop right, until
-            #     a[hint + lastofs] < key <= a[hint + ofs]
+            lastofs += 1
+            while lastofs < ofs:
+                m = lastofs + ((ofs - lastofs) >> 1)
+                if lower(a.list[a.base + m], key):
+                    lastofs = m+1   # a[m] < key
+                else:
+                    ofs = m         # key <= a[m]
 
-            maxofs = a.len - hint     # a[a.len-1] is highest
-            while ofs < maxofs:
-                if lower(a.list[p + ofs], key):
-                    lastofs = ofs
-                    try:
-                        ofs = ovfcheck_lshift(ofs, 1)
-                    except OverflowError:
-                        ofs = maxofs
-                    else:
-                        ofs = ofs + 1
-                else:  # key <= a[hint + ofs]
-                    break
+            assert lastofs == ofs         # so a[ofs-1] < key <= a[ofs]
+            return ofs
 
-            if ofs > maxofs:
-                ofs = maxofs
-            # Translate back to offsets relative to a.
-            lastofs += hint
-            ofs += hint
+        # hint for the annotator: the argument 'rightmost' is always passed in 
as
+        # a constant (either True or False), so we can specialize the function 
for
+        # the two cases.  (This is actually needed for technical reasons: the
+        # variable 'lower' must contain a known method, which is the case in 
each
+        # specialized version but not in the unspecialized one.)
+        gallop._annspecialcase_ = "specialize:arg(4)"
 
-        else:
-            # key <= a[hint] -- gallop left, until
-            #     a[hint - ofs] < key <= a[hint - lastofs]
-            maxofs = hint + 1   # a[0] is lowest
-            while ofs < maxofs:
-                if lower(a.list[p - ofs], key):
-                    break
-                else:
-                    # key <= a[hint - ofs]
-                    lastofs = ofs
-                    try:
-                        ofs = ovfcheck_lshift(ofs, 1)
-                    except OverflowError:
-                        ofs = maxofs
-                    else:
-                        ofs = ofs + 1
-            if ofs > maxofs:
-                ofs = maxofs
-            # Translate back to positive offsets relative to a.
-            lastofs, ofs = hint-ofs, hint-lastofs
+        # ____________________________________________________________
 
-        assert -1 <= lastofs < ofs <= a.len
+        # When we get into galloping mode, we stay there until both runs win 
less
+        # often than MIN_GALLOP consecutive times.  See listsort.txt for more 
info.
+        MIN_GALLOP = 7
 
-        # Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
-        # right of lastofs but no farther right than ofs.  Do a binary
-        # search, with invariant a[lastofs-1] < key <= a[ofs].
-        
-        lastofs += 1
-        while lastofs < ofs:
-            m = lastofs + ((ofs - lastofs) >> 1)
-            if lower(a.list[a.base + m], key):
-                lastofs = m+1   # a[m] < key
-            else:
-                ofs = m         # key <= a[m]
+        def merge_init(self):
+            # This controls when we get *into* galloping mode.  It's 
initialized
+            # to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
+            # random data, and lower for highly structured data.
+            self.min_gallop = self.MIN_GALLOP
 
-        assert lastofs == ofs         # so a[ofs-1] < key <= a[ofs]
-        return ofs
+            # A stack of n pending runs yet to be merged.  Run #i starts at
+            # address pending[i].base and extends for pending[i].len elements.
+            # It's always true (so long as the indices are in bounds) that
+            #
+            #     pending[i].base + pending[i].len == pending[i+1].base
+            #
+            # so we could cut the storage for this, but it's a minor amount,
+            # and keeping all the info explicit simplifies the code.
+            self.pending = []
 
-    # hint for the annotator: the argument 'rightmost' is always passed in as
-    # a constant (either True or False), so we can specialize the function for
-    # the two cases.  (This is actually needed for technical reasons: the
-    # variable 'lower' must contain a known method, which is the case in each
-    # specialized version but not in the unspecialized one.)
-    gallop._annspecialcase_ = "specialize:arg(4)"
+        # Merge the slice "a" with the slice "b" in a stable way, in-place.
+        # a.len and b.len must be > 0, and a.base + a.len == b.base.
+        # Must also have that b.list[b.base] < a.list[a.base], that
+        # a.list[a.base+a.len-1] belongs at the end of the merge, and should 
have
+        # a.len <= b.len.  See listsort.txt for more info.
 
-    # ____________________________________________________________
+        def merge_lo(self, a, b):
+            assert a.len > 0 and b.len > 0 and a.base + a.len == b.base
+            min_gallop = self.min_gallop
+            dest = a.base
+            a = a.copyitems()
 
-    # When we get into galloping mode, we stay there until both runs win less
-    # often than MIN_GALLOP consecutive times.  See listsort.txt for more info.
-    MIN_GALLOP = 7
+            # Invariant: elements in "a" are waiting to be reinserted into the 
list
+            # at "dest".  They should be merged with the elements of "b".
+            # b.base == dest + a.len.
+            # We use a finally block to ensure that the elements remaining in
+            # the copy "a" are reinserted back into self.list in all cases.
+            try:
+                self.list[dest] = b.popleft()
+                dest += 1
+                if a.len == 1 or b.len == 0:
+                    return
 
-    def merge_init(self):
-        # This controls when we get *into* galloping mode.  It's initialized
-        # to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
-        # random data, and lower for highly structured data.
-        self.min_gallop = self.MIN_GALLOP
+                while True:
+                    acount = 0   # number of times A won in a row
+                    bcount = 0   # number of times B won in a row
 
-        # A stack of n pending runs yet to be merged.  Run #i starts at
-        # address pending[i].base and extends for pending[i].len elements.
-        # It's always true (so long as the indices are in bounds) that
-        #
-        #     pending[i].base + pending[i].len == pending[i+1].base
-        #
-        # so we could cut the storage for this, but it's a minor amount,
-        # and keeping all the info explicit simplifies the code.
-        self.pending = []
+                    # Do the straightforward thing until (if ever) one run
+                    # appears to win consistently.
+                    while True:
+                        if self.lt(b.list[b.base], a.list[a.base]):
+                            self.list[dest] = b.popleft()
+                            dest += 1
+                            if b.len == 0:
+                                return
+                            bcount += 1
+                            acount = 0
+                            if bcount >= min_gallop:
+                                break
+                        else:
+                            self.list[dest] = a.popleft()
+                            dest += 1
+                            if a.len == 1:
+                                return
+                            acount += 1
+                            bcount = 0
+                            if acount >= min_gallop:
+                                break
 
-    # Merge the slice "a" with the slice "b" in a stable way, in-place.
-    # a.len and b.len must be > 0, and a.base + a.len == b.base.
-    # Must also have that b.list[b.base] < a.list[a.base], that
-    # a.list[a.base+a.len-1] belongs at the end of the merge, and should have
-    # a.len <= b.len.  See listsort.txt for more info.
+                    # One run is winning so consistently that galloping may
+                    # be a huge win.  So try that, and continue galloping until
+                    # (if ever) neither run appears to be winning consistently
+                    # anymore.
+                    min_gallop += 1
 
-    def merge_lo(self, a, b):
-        assert a.len > 0 and b.len > 0 and a.base + a.len == b.base
-        min_gallop = self.min_gallop
-        dest = a.base
-        a = a.copyitems()
+                    while True:
+                        min_gallop -= min_gallop > 1
+                        self.min_gallop = min_gallop
 
-        # Invariant: elements in "a" are waiting to be reinserted into the list
-        # at "dest".  They should be merged with the elements of "b".
-        # b.base == dest + a.len.
-        # We use a finally block to ensure that the elements remaining in
-        # the copy "a" are reinserted back into self.list in all cases.
-        try:
-            self.list[dest] = b.popleft()
-            dest += 1
-            if a.len == 1 or b.len == 0:
-                return
+                        acount = self.gallop(b.list[b.base], a, hint=0,
+                                             rightmost=True)
+                        for p in xrange(a.base, a.base + acount):
+                            self.list[dest] = a.list[p]
+                            dest += 1
+                        a.advance(acount)
+                        # a.len==0 is impossible now if the comparison
+                        # function is consistent, but we can't assume
+                        # that it is.
+                        if a.len <= 1:
+                            return
 
-            while True:
-                acount = 0   # number of times A won in a row
-                bcount = 0   # number of times B won in a row
-
-                # Do the straightforward thing until (if ever) one run
-                # appears to win consistently.
-                while True:
-                    if self.lt(b.list[b.base], a.list[a.base]):
                         self.list[dest] = b.popleft()
                         dest += 1
                         if b.len == 0:
                             return
-                        bcount += 1
-                        acount = 0
-                        if bcount >= min_gallop:
-                            break
-                    else:
+
+                        bcount = self.gallop(a.list[a.base], b, hint=0,
+                                             rightmost=False)
+                        for p in xrange(b.base, b.base + bcount):
+                            self.list[dest] = b.list[p]
+                            dest += 1
+                        b.advance(bcount)
+                        if b.len == 0:
+                            return
+
                         self.list[dest] = a.popleft()
                         dest += 1
                         if a.len == 1:
                             return
-                        acount += 1
-                        bcount = 0
-                        if acount >= min_gallop:
+
+                        if acount < self.MIN_GALLOP and bcount < 
self.MIN_GALLOP:
                             break
 
-                # One run is winning so consistently that galloping may
-                # be a huge win.  So try that, and continue galloping until
-                # (if ever) neither run appears to be winning consistently
-                # anymore.
-                min_gallop += 1
+                    min_gallop += 1  # penalize it for leaving galloping mode
+                    self.min_gallop = min_gallop
+
+            finally:
+                # The last element of a belongs at the end of the merge, so we 
copy
+                # the remaining elements of b before the remaining elements of 
a.
+                assert a.len >= 0 and b.len >= 0
+                for p in xrange(b.base, b.base + b.len):
+                    self.list[dest] = b.list[p]
+                    dest += 1
+                for p in xrange(a.base, a.base + a.len):
+                    self.list[dest] = a.list[p]
+                    dest += 1
+
+        # Same as merge_lo(), but should have a.len >= b.len.
+
+        def merge_hi(self, a, b):
+            assert a.len > 0 and b.len > 0 and a.base + a.len == b.base
+            min_gallop = self.min_gallop
+            dest = b.base + b.len
+            b = b.copyitems()
+
+            # Invariant: elements in "b" are waiting to be reinserted into the 
list
+            # before "dest".  They should be merged with the elements of "a".
+            # a.base + a.len == dest - b.len.
+            # We use a finally block to ensure that the elements remaining in
+            # the copy "b" are reinserted back into self.list in all cases.
+            try:
+                dest -= 1
+                self.list[dest] = a.popright()
+                if a.len == 0 or b.len == 1:
+                    return
 
                 while True:
-                    min_gallop -= min_gallop > 1
+                    acount = 0   # number of times A won in a row
+                    bcount = 0   # number of times B won in a row
+
+                    # Do the straightforward thing until (if ever) one run
+                    # appears to win consistently.
+                    while True:
+                        nexta = a.list[a.base + a.len - 1]
+                        nextb = b.list[b.base + b.len - 1]
+                        if self.lt(nextb, nexta):
+                            dest -= 1
+                            self.list[dest] = nexta
+                            a.len -= 1
+                            if a.len == 0:
+                                return
+                            acount += 1
+                            bcount = 0
+                            if acount >= min_gallop:
+                                break
+                        else:
+                            dest -= 1
+                            self.list[dest] = nextb
+                            b.len -= 1
+                            if b.len == 1:
+                                return
+                            bcount += 1
+                            acount = 0
+                            if bcount >= min_gallop:
+                                break
+
+                    # One run is winning so consistently that galloping may
+                    # be a huge win.  So try that, and continue galloping until
+                    # (if ever) neither run appears to be winning consistently
+                    # anymore.
+                    min_gallop += 1
+
+                    while True:
+                        min_gallop -= min_gallop > 1
+                        self.min_gallop = min_gallop
+
+                        nextb = b.list[b.base + b.len - 1]
+                        k = self.gallop(nextb, a, hint=a.len-1, rightmost=True)
+                        acount = a.len - k
+                        for p in xrange(a.base + a.len - 1, a.base + k - 1, 
-1):
+                            dest -= 1
+                            self.list[dest] = a.list[p]
+                        a.len -= acount
+                        if a.len == 0:
+                            return
+
+                        dest -= 1
+                        self.list[dest] = b.popright()
+                        if b.len == 1:
+                            return
+
+                        nexta = a.list[a.base + a.len - 1]
+                        k = self.gallop(nexta, b, hint=b.len-1, 
rightmost=False)
+                        bcount = b.len - k
+                        for p in xrange(b.base + b.len - 1, b.base + k - 1, 
-1):
+                            dest -= 1
+                            self.list[dest] = b.list[p]
+                        b.len -= bcount
+                        # b.len==0 is impossible now if the comparison
+                        # function is consistent, but we can't assume
+                        # that it is.
+                        if b.len <= 1:
+                            return
+
+                        dest -= 1
+                        self.list[dest] = a.popright()
+                        if a.len == 0:
+                            return
+
+                        if acount < self.MIN_GALLOP and bcount < 
self.MIN_GALLOP:
+                            break
+
+                    min_gallop += 1  # penalize it for leaving galloping mode
                     self.min_gallop = min_gallop
 
-                    acount = self.gallop(b.list[b.base], a, hint=0,
-                                         rightmost=True)
-                    for p in xrange(a.base, a.base + acount):
-                        self.list[dest] = a.list[p]
-                        dest += 1
-                    a.advance(acount)
-                    # a.len==0 is impossible now if the comparison
-                    # function is consistent, but we can't assume
-                    # that it is.
-                    if a.len <= 1:
-                        return
+            finally:
+                # The last element of a belongs at the end of the merge, so we 
copy
+                # the remaining elements of a and then the remaining elements 
of b.
+                assert a.len >= 0 and b.len >= 0
+                for p in xrange(a.base + a.len - 1, a.base - 1, -1):
+                    dest -= 1
+                    self.list[dest] = a.list[p]
+                for p in xrange(b.base + b.len - 1, b.base - 1, -1):
+                    dest -= 1
+                    self.list[dest] = b.list[p]
 
-                    self.list[dest] = b.popleft()
-                    dest += 1
-                    if b.len == 0:
-                        return
+        # Merge the two runs at stack indices i and i+1.
 
-                    bcount = self.gallop(a.list[a.base], b, hint=0,
-                                         rightmost=False)
-                    for p in xrange(b.base, b.base + bcount):
-                        self.list[dest] = b.list[p]
-                        dest += 1
-                    b.advance(bcount)
-                    if b.len == 0:
-                        return
+        def merge_at(self, i):
+            a = self.pending[i]
+            b = self.pending[i+1]
+            assert a.len > 0 and b.len > 0
+            assert a.base + a.len == b.base
 
-                    self.list[dest] = a.popleft()
-                    dest += 1
-                    if a.len == 1:
-                        return
+            # Record the length of the combined runs and remove the run b
+            self.pending[i] = ListSlice(self.list, a.base, a.len + b.len)
+            del self.pending[i+1]
 
-                    if acount < self.MIN_GALLOP and bcount < self.MIN_GALLOP:
-                        break
-
-                min_gallop += 1  # penalize it for leaving galloping mode
-                self.min_gallop = min_gallop
-
-        finally:
-            # The last element of a belongs at the end of the merge, so we copy
-            # the remaining elements of b before the remaining elements of a.
-            assert a.len >= 0 and b.len >= 0
-            for p in xrange(b.base, b.base + b.len):
-                self.list[dest] = b.list[p]
-                dest += 1
-            for p in xrange(a.base, a.base + a.len):
-                self.list[dest] = a.list[p]
-                dest += 1
-
-    # Same as merge_lo(), but should have a.len >= b.len.
-
-    def merge_hi(self, a, b):
-        assert a.len > 0 and b.len > 0 and a.base + a.len == b.base
-        min_gallop = self.min_gallop
-        dest = b.base + b.len
-        b = b.copyitems()
-
-        # Invariant: elements in "b" are waiting to be reinserted into the list
-        # before "dest".  They should be merged with the elements of "a".
-        # a.base + a.len == dest - b.len.
-        # We use a finally block to ensure that the elements remaining in
-        # the copy "b" are reinserted back into self.list in all cases.
-        try:
-            dest -= 1
-            self.list[dest] = a.popright()
-            if a.len == 0 or b.len == 1:
+            # Where does b start in a?  Elements in a before that can be
+            # ignored (already in place).
+            k = self.gallop(b.list[b.base], a, hint=0, rightmost=True)
+            a.advance(k)
+            if a.len == 0:
                 return
 
-            while True:
-                acount = 0   # number of times A won in a row
-                bcount = 0   # number of times B won in a row
+            # Where does a end in b?  Elements in b after that can be
+            # ignored (already in place).
+            b.len = self.gallop(a.list[a.base+a.len-1], b, hint=b.len-1,
+                                rightmost=False)
+            if b.len == 0:
+                return
 
-                # Do the straightforward thing until (if ever) one run
-                # appears to win consistently.
-                while True:
-                    nexta = a.list[a.base + a.len - 1]
-                    nextb = b.list[b.base + b.len - 1]
-                    if self.lt(nextb, nexta):
-                        dest -= 1
-                        self.list[dest] = nexta
-                        a.len -= 1
-                        if a.len == 0:
-                            return
-                        acount += 1
-                        bcount = 0
-                        if acount >= min_gallop:
-                            break
+            # Merge what remains of the runs.  The direction is chosen to
+            # minimize the temporary storage needed.
+            if a.len <= b.len:
+                self.merge_lo(a, b)
+            else:
+                self.merge_hi(a, b)
+
+        # Examine the stack of runs waiting to be merged, merging adjacent runs
+        # until the stack invariants are re-established:
+        #
+        # 1. len[-3] > len[-2] + len[-1]
+        # 2. len[-2] > len[-1]
+        #
+        # See listsort.txt for more info.
+
+        def merge_collapse(self):
+            p = self.pending
+            while len(p) > 1:
+                if len(p) >= 3 and p[-3].len <= p[-2].len + p[-1].len:
+                    if p[-3].len < p[-1].len:
+                        self.merge_at(-3)
                     else:
-                        dest -= 1
-                        self.list[dest] = nextb
-                        b.len -= 1
-                        if b.len == 1:
-                            return
-                        bcount += 1
-                        acount = 0
-                        if bcount >= min_gallop:
-                            break
+                        self.merge_at(-2)
+                elif p[-2].len <= p[-1].len:
+                    self.merge_at(-2)
+                else:
+                    break
 
-                # One run is winning so consistently that galloping may
-                # be a huge win.  So try that, and continue galloping until
-                # (if ever) neither run appears to be winning consistently
-                # anymore.
-                min_gallop += 1
+        # Regardless of invariants, merge all runs on the stack until only one
+        # remains.  This is used at the end of the mergesort.
 
-                while True:
-                    min_gallop -= min_gallop > 1
-                    self.min_gallop = min_gallop
-
-                    nextb = b.list[b.base + b.len - 1]
-                    k = self.gallop(nextb, a, hint=a.len-1, rightmost=True)
-                    acount = a.len - k
-                    for p in xrange(a.base + a.len - 1, a.base + k - 1, -1):
-                        dest -= 1
-                        self.list[dest] = a.list[p]
-                    a.len -= acount
-                    if a.len == 0:
-                        return
-
-                    dest -= 1
-                    self.list[dest] = b.popright()
-                    if b.len == 1:
-                        return
-
-                    nexta = a.list[a.base + a.len - 1]
-                    k = self.gallop(nexta, b, hint=b.len-1, rightmost=False)
-                    bcount = b.len - k
-                    for p in xrange(b.base + b.len - 1, b.base + k - 1, -1):
-                        dest -= 1
-                        self.list[dest] = b.list[p]
-                    b.len -= bcount
-                    # b.len==0 is impossible now if the comparison
-                    # function is consistent, but we can't assume
-                    # that it is.
-                    if b.len <= 1:
-                        return
-
-                    dest -= 1
-                    self.list[dest] = a.popright()
-                    if a.len == 0:
-                        return
-
-                    if acount < self.MIN_GALLOP and bcount < self.MIN_GALLOP:
-                        break
-
-                min_gallop += 1  # penalize it for leaving galloping mode
-                self.min_gallop = min_gallop
-
-        finally:
-            # The last element of a belongs at the end of the merge, so we copy
-            # the remaining elements of a and then the remaining elements of b.
-            assert a.len >= 0 and b.len >= 0
-            for p in xrange(a.base + a.len - 1, a.base - 1, -1):
-                dest -= 1
-                self.list[dest] = a.list[p]
-            for p in xrange(b.base + b.len - 1, b.base - 1, -1):
-                dest -= 1
-                self.list[dest] = b.list[p]
-
-    # Merge the two runs at stack indices i and i+1.
-
-    def merge_at(self, i):
-        a = self.pending[i]
-        b = self.pending[i+1]
-        assert a.len > 0 and b.len > 0
-        assert a.base + a.len == b.base
-
-        # Record the length of the combined runs and remove the run b
-        self.pending[i] = ListSlice(self.list, a.base, a.len + b.len)
-        del self.pending[i+1]
-
-        # Where does b start in a?  Elements in a before that can be
-        # ignored (already in place).
-        k = self.gallop(b.list[b.base], a, hint=0, rightmost=True)
-        a.advance(k)
-        if a.len == 0:
-            return
-
-        # Where does a end in b?  Elements in b after that can be
-        # ignored (already in place).
-        b.len = self.gallop(a.list[a.base+a.len-1], b, hint=b.len-1,
-                            rightmost=False)
-        if b.len == 0:
-            return
-
-        # Merge what remains of the runs.  The direction is chosen to
-        # minimize the temporary storage needed.
-        if a.len <= b.len:
-            self.merge_lo(a, b)
-        else:
-            self.merge_hi(a, b)
-
-    # Examine the stack of runs waiting to be merged, merging adjacent runs
-    # until the stack invariants are re-established:
-    #
-    # 1. len[-3] > len[-2] + len[-1]
-    # 2. len[-2] > len[-1]
-    #
-    # See listsort.txt for more info.
-
-    def merge_collapse(self):
-        p = self.pending
-        while len(p) > 1:
-            if len(p) >= 3 and p[-3].len <= p[-2].len + p[-1].len:
-                if p[-3].len < p[-1].len:
+        def merge_force_collapse(self):
+            p = self.pending
+            while len(p) > 1:
+                if len(p) >= 3 and p[-3].len < p[-1].len:
                     self.merge_at(-3)
                 else:
                     self.merge_at(-2)
-            elif p[-2].len <= p[-1].len:
-                self.merge_at(-2)
-            else:
-                break
 
-    # Regardless of invariants, merge all runs on the stack until only one
-    # remains.  This is used at the end of the mergesort.
+        # Compute a good value for the minimum run length; natural runs shorter
+        # than this are boosted artificially via binary insertion.
+        #
+        # If n < 64, return n (it's too small to bother with fancy stuff).
+        # Else if n is an exact power of 2, return 32.
+        # Else return an int k, 32 <= k <= 64, such that n/k is close to, but
+        # strictly less than, an exact power of 2.
+        #
+        # See listsort.txt for more info.
 
-    def merge_force_collapse(self):
-        p = self.pending
-        while len(p) > 1:
-            if len(p) >= 3 and p[-3].len < p[-1].len:
-                self.merge_at(-3)
-            else:
-                self.merge_at(-2)
+        def merge_compute_minrun(self, n):
+            r = 0    # becomes 1 if any 1 bits are shifted off
+            while n >= 64:
+                r |= n & 1
+                n >>= 1
+            return n + r
 
-    # Compute a good value for the minimum run length; natural runs shorter
-    # than this are boosted artificially via binary insertion.
-    #
-    # If n < 64, return n (it's too small to bother with fancy stuff).
-    # Else if n is an exact power of 2, return 32.
-    # Else return an int k, 32 <= k <= 64, such that n/k is close to, but
-    # strictly less than, an exact power of 2.
-    #
-    # See listsort.txt for more info.
+        # ____________________________________________________________
+        # Entry point.
 
-    def merge_compute_minrun(self, n):
-        r = 0    # becomes 1 if any 1 bits are shifted off
-        while n >= 64:
-            r |= n & 1
-            n >>= 1
-        return n + r
+        def sort(self):
+            remaining = ListSlice(self.list, 0, self.listlength)
+            if remaining.len < 2:
+                return
 
-    # ____________________________________________________________
-    # Entry point.
+            # March over the array once, left to right, finding natural runs,
+            # and extending short natural runs to minrun elements.
+            self.merge_init()
+            minrun = self.merge_compute_minrun(remaining.len)
 
-    def sort(self):
-        remaining = ListSlice(self.list, 0, self.listlength)
-        if remaining.len < 2:
-            return
+            while remaining.len > 0:
+                # Identify next run.
+                run, descending = self.count_run(remaining)
+                if descending:
+                    run.reverse()
+                # If short, extend to min(minrun, nremaining).
+                if run.len < minrun:
+                    sorted = run.len
+                    run.len = min(minrun, remaining.len)
+                    self.binarysort(run, sorted)
+                # Advance remaining past this run.
+                remaining.advance(run.len)
+                # Push run onto pending-runs stack, and maybe merge.
+                self.pending.append(run)
+                self.merge_collapse()
 
-        # March over the array once, left to right, finding natural runs,
-        # and extending short natural runs to minrun elements.
-        self.merge_init()
-        minrun = self.merge_compute_minrun(remaining.len)
+            assert remaining.base == self.listlength
 
-        while remaining.len > 0:
-            # Identify next run.
-            run, descending = self.count_run(remaining)
-            if descending:
-                run.reverse()
-            # If short, extend to min(minrun, nremaining).
-            if run.len < minrun:
-                sorted = run.len
-                run.len = min(minrun, remaining.len)
-                self.binarysort(run, sorted)
-            # Advance remaining past this run.
-            remaining.advance(run.len)
-            # Push run onto pending-runs stack, and maybe merge.
-            self.pending.append(run)
-            self.merge_collapse()
+            self.merge_force_collapse()
+            assert len(self.pending) == 1
+            assert self.pending[0].base == 0
+            assert self.pending[0].len == self.listlength
 
-        assert remaining.base == self.listlength
 
-        self.merge_force_collapse()
-        assert len(self.pending) == 1
-        assert self.pending[0].base == 0
-        assert self.pending[0].len == self.listlength
+    class ListSlice:
+        "A sublist of a list."
 
+        def __init__(self, list, base, len):
+            self.list = list
+            self.base = base
+            self.len  = len
 
-class ListSlice:
-    "A sublist of a list."
+        def copyitems(self):
+            "Make a copy of the slice of the original list."
+            start = self.base
+            stop  = self.base + self.len
+            assert 0 <= start <= stop     # annotator hint
+            return ListSlice(self.list[start:stop], 0, self.len)
 
-    def __init__(self, list, base, len):
-        self.list = list
-        self.base = base
-        self.len  = len
+        def advance(self, n):
+            self.base += n
+            self.len -= n
 
-    def copyitems(self):
-        "Make a copy of the slice of the original list."
-        start = self.base
-        stop  = self.base + self.len
-        assert 0 <= start <= stop     # annotator hint
-        return ListSlice(self.list[start:stop], 0, self.len)
+        def popleft(self):
+            result = self.list[self.base]
+            self.base += 1
+            self.len -= 1
+            return result
 
-    def advance(self, n):
-        self.base += n
-        self.len -= n
+        def popright(self):
+            self.len -= 1
+            return self.list[self.base + self.len]
 
-    def popleft(self):
-        result = self.list[self.base]
-        self.base += 1
-        self.len -= 1
-        return result
+        def reverse(self):
+            "Reverse the slice in-place."
+            list = self.list
+            lo = self.base
+            hi = lo + self.len - 1
+            while lo < hi:
+                list[lo], list[hi] = list[hi], list[lo]
+                lo += 1
+                hi -= 1
+    return TimSort
 
-    def popright(self):
-        self.len -= 1
-        return self.list[self.base + self.len]
-
-    def reverse(self):
-        "Reverse the slice in-place."
-        list = self.list
-        lo = self.base
-        hi = lo + self.len - 1
-        while lo < hi:
-            list[lo], list[hi] = list[hi], list[lo]
-            lo += 1
-            hi -= 1
+TimSort = make_timsort_class() #backward compatible interface
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: prepare listobject and timsort for liststrategies that will be merged later

Reply via email to