Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r81572:ef530201647c Date: 2016-01-05 11:58 +0100 http://bitbucket.org/pypy/pypy/changeset/ef530201647c/
Log: CPython has a special case for ``long("string", power-of-two-base)`` to avoid quadratic time. It is used by pickling, notably. diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py --- a/pypy/objspace/std/test/test_longobject.py +++ b/pypy/objspace/std/test/test_longobject.py @@ -358,3 +358,10 @@ assert 3L.__coerce__(4L) == (3L, 4L) assert 3L.__coerce__(4) == (3, 4) assert 3L.__coerce__(object()) == NotImplemented + + def test_linear_long_base_16(self): + # never finishes if long(_, 16) is not linear-time + size = 100000 + n = "5" + "0" * size + expected = 5 << (size * 4) + assert long(n, 16) == expected diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -2794,8 +2794,10 @@ def parse_digit_string(parser): # helper for fromstr + base = parser.base + if (base & (base - 1)) == 0: + return parse_string_from_binary_base(parser) a = rbigint() - base = parser.base digitmax = BASE_MAX[base] tens, dig = 1, 0 while True: @@ -2811,3 +2813,50 @@ tens *= base a.sign *= parser.sign return a + +def parse_string_from_binary_base(parser): + # The point to this routine is that it takes time linear in the number of + # string characters. + base = parser.base + if base == 2: bits_per_char = 1 + elif base == 4: bits_per_char = 2 + elif base == 8: bits_per_char = 3 + elif base == 16: bits_per_char = 4 + elif base == 32: bits_per_char = 5 + else: + raise AssertionError + + # n <- total number of bits needed, while moving 'parser' to the end + n = 0 + while parser.next_digit() >= 0: + n += 1 + + # b <- number of Python digits needed, = ceiling(n/SHIFT). */ + try: + b = ovfcheck(n * bits_per_char) + b = ovfcheck(b + (SHIFT - 1)) + except OverflowError: + raise ParseStringError("long string too large to convert") + b = (b // SHIFT) or 1 + z = rbigint([NULLDIGIT] * b, sign=parser.sign) + + # Read string from right, and fill in long from left; i.e., + # from least to most significant in both. + accum = _widen_digit(0) + bits_in_accum = 0 + pdigit = 0 + for _ in range(n): + k = parser.prev_digit() + accum |= _widen_digit(k) << bits_in_accum + bits_in_accum += bits_per_char + if bits_in_accum >= SHIFT: + z.setdigit(pdigit, accum) + pdigit += 1 + assert pdigit <= b + accum >>= SHIFT + bits_in_accum -= SHIFT + + if bits_in_accum: + z.setdigit(pdigit, accum) + z._normalize() + return z diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -485,6 +485,24 @@ else: return -1 + def prev_digit(self): + # After exhausting all n digits in next_digit(), you can walk them + # again in reverse order by calling prev_digit() exactly n times + i = self.i - 1 + assert i >= 0 + self.i = i + c = self.s[i] + digit = ord(c) + if '0' <= c <= '9': + digit -= ord('0') + elif 'A' <= c <= 'Z': + digit = (digit - ord('A')) + 10 + elif 'a' <= c <= 'z': + digit = (digit - ord('a')) + 10 + else: + raise AssertionError + return digit + # -------------- public API --------------------------------- INIT_SIZE = 100 # XXX tweak diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -825,7 +825,19 @@ def __init__(self, base, sign, digits): self.base = base self.sign = sign - self.next_digit = iter(digits + [-1]).next + self.i = 0 + self._digits = digits + def next_digit(self): + i = self.i + if i == len(self._digits): + return -1 + self.i = i + 1 + return self._digits[i] + def prev_digit(self): + i = self.i - 1 + assert i >= 0 + self.i = i + return self._digits[i] x = parse_digit_string(Parser(10, 1, [6])) assert x.eq(rbigint.fromint(6)) x = parse_digit_string(Parser(10, 1, [6, 2, 3])) @@ -847,6 +859,16 @@ x = parse_digit_string(Parser(7, -1, [0, 0, 0])) assert x.tobool() is False + for base in [2, 4, 8, 16, 32]: + for inp in [[0], [1], [1, 0], [0, 1], [1, 0, 1], [1, 0, 0, 1], + [1, 0, 0, base-1, 0, 1], [base-1, 1, 0, 0, 0, 1, 0], + [base-1]]: + inp = inp * 97 + x = parse_digit_string(Parser(base, -1, inp)) + num = sum(inp[i] * (base ** (len(inp)-1-i)) + for i in range(len(inp))) + assert x.eq(rbigint.fromlong(-num)) + BASE = 2 ** SHIFT _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit