Author: stian
Branch: improve-rbigint
Changeset: r56344:f89eae2a4218
Date: 2012-07-04 01:34 +0200
http://bitbucket.org/pypy/pypy/changeset/f89eae2a4218/
Log: Add some _always_inline_ (for some reason it doesn't always
happend). This makes lshift 15% faster
diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -89,7 +89,7 @@
return r_longlonglong(x)
else:
return r_longlong(x)
-
+_widen_digit._always_inline_ = True
def _store_digit(x):
"""if not we_are_translated():
assert is_valid_int(x), "store_digit() takes an int, got a %r" %
type(x)"""
@@ -102,6 +102,7 @@
else:
raise ValueError("SHIFT too large!")
_store_digit._annspecialcase_ = 'specialize:argtype(0)'
+_store_digit._always_inline_ = True
def _load_digit(x):
if SHIFT < LONG_BIT: # This would be the case for any SHIFT < LONG_BIT
@@ -109,6 +110,7 @@
else:
# x already is a type large enough, just not as fast.
return x
+_load_digit._always_inline_ = True
def _load_unsigned_digit(x):
if SHIFT < LONG_BIT: # This would be the case for any SHIFT < LONG_BIT
@@ -117,6 +119,7 @@
# This needs a performance test on 32bit
return rffi.cast(rffi.ULONGLONG, x)
#return r_ulonglong(x)
+_load_unsigned_digit._always_inline_ = True
NULLDIGIT = _store_digit(0)
ONEDIGIT = _store_digit(1)
@@ -151,25 +154,30 @@
def digit(self, x):
"""Return the x'th digit, as an int."""
return _load_digit(self._digits[x])
-
+ digit._always_inline_ = True
+
def widedigit(self, x):
"""Return the x'th digit, as a long long int if needed
to have enough room to contain two digits."""
return _widen_digit(_load_digit(self._digits[x]))
-
+ widedigit._always_inline_ = True
+
def udigit(self, x):
"""Return the x'th digit, as an unsigned int."""
return _load_unsigned_digit(self._digits[x])
-
+ udigit._always_inline_ = True
+
def setdigit(self, x, val):
val = _mask_digit(val)
assert val >= 0
self._digits[x] = _store_digit(val)
setdigit._annspecialcase_ = 'specialize:argtype(2)'
+ setdigit._always_inline_ = True
def numdigits(self):
return len(self._digits)
-
+ numdigits._always_inline_ = True
+
@staticmethod
@jit.elidable
def fromint(intval):
@@ -708,7 +716,8 @@
z._normalize()
return z
-
+ lshift._always_inline_ = True # It's so fast that it's always benefitial.
+
@jit.elidable
def lqshift(self, int_other):
" A quicker one with much less checks, int_other is valid and for the
most part constant."
@@ -727,6 +736,7 @@
z.setdigit(oldsize, accum)
z._normalize()
return z
+ lqshift._always_inline_ = True # It's so fast that it's always benefitial.
@jit.elidable
def rshift(self, int_other, dont_invert=False):
@@ -761,7 +771,8 @@
j += 1
z._normalize()
return z
-
+ rshift._always_inline_ = True # It's so fast that it's always benefitial.
+
@jit.elidable
def and_(self, other):
return _bitwise(self, '&', other)
@@ -1690,15 +1701,15 @@
def _divrem(a, b):
""" Long division with remainder, top-level routine """
- size_a = _load_unsigned_digit(a.numdigits())
- size_b = _load_unsigned_digit(b.numdigits())
+ size_a = a.numdigits()
+ size_b = b.numdigits()
if b.sign == 0:
raise ZeroDivisionError("long division or modulo by zero")
if (size_a < size_b or
(size_a == size_b and
- a.digit(size_a-1) < b.digit(size_b-1))):
+ a.digit(abs(size_a-1)) < b.digit(abs(size_b-1)))):
# |a| < |b|
return NULLRBIGINT, a# result is 0
if size_b == 1:
diff --git a/pypy/translator/goal/targetbigintbenchmark.py
b/pypy/translator/goal/targetbigintbenchmark.py
--- a/pypy/translator/goal/targetbigintbenchmark.py
+++ b/pypy/translator/goal/targetbigintbenchmark.py
@@ -12,37 +12,38 @@
A cutout with some benchmarks.
Pypy default:
- 2.777119
- 2.316023
- 2.418211
- 5.147583
- 5.139127
- 484.5688
- 334.611903
- 8.637287
- 12.211942
- 18.270045
- 2.512140
- 14.148920
- 18.576713
- 6.647562
-
+ 2.803071
+ 2.366586
+ 2.428205
+ 4.408400
+ 4.424533
+ 537.338
+ 268.3339
+ 8.548186
+ 12.197392
+ 17.629869
+ 2.360716
+ 14.315827
+ 17.963899
+ 6.604541
+ Sum: 901.7231250000001
+
Pypy with improvements:
- 2.822389 # Little slower, divmod
- 2.522946 # Little shower, rshift
- 4.600970 # Much slower, lshift
- 2.126048 # Twice as fast
- 4.276203 # Little faster
- 9.662745 # 50 times faster
- 1.621029 # 200 times faster
- 3.956685 # Twice as fast
- 5.752223 # Twice as fast
- 7.660295 # More than twice as fast
- 0.039137 # 50 times faster
- 4.437456 # 3 times faster
- 9.078680 # Twice as fast
- 4.995520 # 1/3 faster, add
-
+ 2.884540
+ 2.499774
+ 3.796117
+ 1.681326
+ 4.060521
+ 9.696996
+ 1.643792
+ 4.045248
+ 4.714733
+ 6.589811
+ 0.039319
+ 3.503355
+ 8.266362
+ 5.044856
+ Sum: 58.466750
A pure python form of those tests where also run
Improved pypy | Pypy | CPython 2.7.3
@@ -61,7 +62,8 @@
9.19830608368 17.0125601292 11.1488289833
5.40441417694 6.59027791023 3.63601899147
"""
-
+ sumTime = 0.0
+
t = time()
num = rbigint.pow(rbigint.fromint(100000000), rbigint.fromint(1024))
by = rbigint.pow(rbigint.fromint(2), rbigint.fromint(128))
@@ -69,7 +71,9 @@
rbigint.divmod(num, by)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
num = rbigint.fromint(1000000000)
@@ -77,7 +81,9 @@
rbigint.rshift(num, 16)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
num = rbigint.fromint(1000000000)
@@ -85,7 +91,9 @@
rbigint.lshift(num, 4)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
num = rbigint.fromint(100000000)
@@ -94,7 +102,9 @@
rbigint.floordiv(num, V2)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
num = rbigint.fromint(100000000)
@@ -103,7 +113,9 @@
rbigint.floordiv(num, V3)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
num = rbigint.fromint(10000000)
@@ -111,7 +123,9 @@
rbigint.pow(V2, num)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
num = rbigint.fromint(100000000)
@@ -119,7 +133,9 @@
rbigint.pow(rbigint.pow(V2, rbigint.fromint(n)), num)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
num = rbigint.pow(rbigint.fromint(10000), rbigint.fromint(2 ** 8))
@@ -129,7 +145,9 @@
rbigint.pow(P10_4, num, V100)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
i = rbigint.fromint(2**31)
@@ -137,7 +155,9 @@
for n in xrange(75000):
i = i.mul(i2)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
@@ -145,7 +165,9 @@
rbigint.pow(rbigint.fromint(n), P10_4)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
@@ -154,7 +176,9 @@
rbigint.pow(V1024, V1024)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
@@ -164,7 +188,9 @@
v = v.mul(P62)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
v2 = rbigint.fromint(2**8)
@@ -172,7 +198,9 @@
v2 = v2.mul(v2)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
t = time()
v3 = rbigint.fromint(2**62)
@@ -180,7 +208,11 @@
v3 = v3.add(v3)
- print time() - t
+ _time = time() - t
+ sumTime += _time
+ print _time
+
+ print "Sum: ", sumTime
return 0
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit