Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78405:03f3796197b8
Date: 2015-07-02 10:48 +0200
http://bitbucket.org/pypy/pypy/changeset/03f3796197b8/
Log: product assembler now uses SHUFPD instead of SHUFPS and moves two
floats instead of one to the accumulator resolving test issues with
the recent changes
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -515,19 +515,29 @@
def define_prod():
return """
- a = |30|
+ a = [1,2,3,4,1,2,3,4]
+ prod(a)
+ """
+
+ def define_prod_zero():
+ return """
+ a = [1,2,3,4,1,2,3,0]
prod(a)
"""
def test_prod(self):
result = self.run("prod")
- expected = 1
- for i in range(30):
- expected *= i * 2
- assert result == expected
+ assert int(result) == 576
self.check_trace_count(1)
self.check_vectorized(1, 1)
+ def test_prod_zero(self):
+ result = self.run("prod_zero")
+ assert int(result) == 0
+ self.check_trace_count(1)
+ self.check_vectorized(1, 1)
+
+
def define_max():
return """
a = |30|
diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
--- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
@@ -42,7 +42,8 @@
import _numpypy.multiarray as np
a = np.array([{a}]*{count}, dtype='{adtype}')
b = np.array([{b}]*{count}, dtype='{bdtype}')
- c = a {op} b
+ for i in range(20):
+ c = a {op} b
return c.sum()
""".format(op=op, adtype=adtype, bdtype=bdtype, count=count, a=a, b=b)
exec py.code.Source(source).compile()
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2560,8 +2560,8 @@
scratchloc = X86_64_XMM_SCRATCH_REG
self.mov(accumloc, scratchloc)
# swap the two elements
- self.mc.SHUFPS_xxi(scratchloc.value, scratchloc.value, 0x01)
- self.mc.MULPD(accumloc, scratchloc)
+ self.mc.SHUFPD_xxi(scratchloc.value, scratchloc.value, 0x01)
+ self.mc.MULSD(accumloc, scratchloc)
if accumloc is not targetloc:
self.mov(accumloc, targetloc)
@@ -2756,7 +2756,8 @@
srcloc, sizeloc = arglocs
size = sizeloc.value
if isinstance(srcloc, ConstFloatLoc):
- self.mov(srcloc, resloc)
+ # they are aligned!
+ self.mc.MOVAPD(resloc, srcloc)
elif size == 4:
# the register allocator forces src to be the same as resloc
# r = (s[0], s[0], r[0], r[0])
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -162,10 +162,12 @@
@staticmethod
def by_descr(descr, vec_reg_size):
_t = INT
+ signed = descr.is_item_signed()
if descr.is_array_of_floats() or descr.concrete_type == FLOAT:
_t = FLOAT
+ signed = False
size = descr.get_item_size_in_bytes()
- pt = PackType(_t, size, descr.is_item_signed(), vec_reg_size // size)
+ pt = PackType(_t, size, signed, vec_reg_size // size)
return pt
def __init__(self, type, size, signed, count=-1):
@@ -214,13 +216,13 @@
return self.count
+PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2)
-PT_FLOAT_GENERIC = PackType(INT, -1, True)
+PT_FLOAT_GENERIC = PackType(INT, -1, False)
PT_INT64 = PackType(INT, 8, True)
PT_INT32_2 = PackType(INT, 4, True, 2)
PT_INT_GENERIC = PackType(INT, -1, True)
-PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
INT_RES = PT_INT_GENERIC
FLOAT_RES = PT_FLOAT_GENERIC
@@ -239,8 +241,7 @@
def check_if_pack_supported(self, pack):
op0 = pack.operations[0].getoperation()
if self.input_type is None:
- # must be a load operation
- assert op0.is_raw_load()
+ # must be a load/guard op
return
insize = self.input_type.getsize()
if op0.casts_box():
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -136,7 +136,7 @@
savings = self.savings(loop1)
assert savings == 2
- @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,-1)])
+ @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,0)])
def test_sum_float_to_int(self, bytes, s):
loop1 = self.parse("""
f10 = raw_load(p0, i0, descr=double)
@@ -189,8 +189,11 @@
f106 = cast_int_to_float(i110)
f107 = cast_int_to_float(i111)
""")
- savings = self.savings(loop1)
- assert savings <= -2
+ try:
+ self.savings(loop1)
+ py.test.fail("must not profitable!")
+ except NotAProfitableLoop:
+ pass
class Test(CostModelBaseTest, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -15,9 +15,9 @@
from rpython.jit.tool.oparser import parse as opparse
from rpython.jit.tool.oparser_model import get_model
-F64 = PackType('f',8,True,2)
-F32 = PackType('f',4,True,4)
-F32_2 = PackType('f',4,True,2)
+F64 = PackType('f',8,False,2)
+F32 = PackType('f',4,False,4)
+F32_2 = PackType('f',4,False,2)
I64 = PackType('i',8,True,2)
I32 = PackType('i',4,True,4)
I32_2 = PackType('i',4,True,2)
@@ -180,7 +180,7 @@
f10 = float_add(f0, 73.0)
f11 = float_add(f1, 73.0)
""")
- pack1 = self.pack(loop1, 0, 2, I64, I64)
+ pack1 = self.pack(loop1, 0, 2, F64, F64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse("""
v10[f64|2] = vec_box(2)
@@ -275,10 +275,10 @@
raw_store(p1, i7, i24, descr=short)
raw_store(p1, i8, i25, descr=short)
""")
- pack1 = self.pack(loop1, 0, 8, None, I64)
- pack2 = self.pack(loop1, 8, 16, I64, I32_2)
+ pack1 = self.pack(loop1, 0, 8, None, F64)
+ pack2 = self.pack(loop1, 8, 16, F64, I32_2)
I16_2 = PackType('i',2,True,2)
- pack3 = self.pack(loop1, 16, 24, I32, I16_2)
+ pack3 = self.pack(loop1, 16, 24, I32_2, I16_2)
pack4 = self.pack(loop1, 24, 32, I16, None)
def void(b,c):
pass
@@ -323,17 +323,17 @@
raw_store(p1, i3, i12, descr=float)
raw_store(p1, i4, i13, descr=float)
""")
- pack1 = self.pack(loop1, 0, 4, None, I64)
- pack2 = self.pack(loop1, 4, 8, I64, I32_2)
+ pack1 = self.pack(loop1, 0, 4, None, F64)
+ pack2 = self.pack(loop1, 4, 8, F64, I32_2)
pack3 = self.pack(loop1, 8, 12, I32, None)
loop2 = self.schedule(loop1, [pack1,pack2,pack3])
loop3 = self.parse("""
v44[f64|2] = vec_raw_load(p0, i1, 2, descr=double)
v45[f64|2] = vec_raw_load(p0, i3, 2, descr=double)
- v46[f32|2] = vec_cast_float_to_singlefloat(v44[f64|2])
- v47[f32|2] = vec_cast_float_to_singlefloat(v45[f64|2])
- v41[f32|4] = vec_float_pack(v46[f32|2], v47[f32|2], 2, 2)
- vec_raw_store(p1, i1, v41[f32|4], descr=float)
+ v46[i32|2] = vec_cast_float_to_singlefloat(v44[f64|2])
+ v47[i32|2] = vec_cast_float_to_singlefloat(v45[f64|2])
+ v41[i32|4] = vec_int_pack(v46[i32|2], v47[i32|2], 2, 2)
+ vec_raw_store(p1, i1, v41[i32|4], descr=float)
""", False)
self.assert_equal(loop2, loop3)
@@ -350,7 +350,7 @@
""")
pack1 = self.pack(loop1, 0, 2, None, I64)
pack2 = self.pack(loop1, 2, 4, I64, I64)
- pack3 = self.pack(loop1, 4, 6, None, I64)
+ pack3 = self.pack(loop1, 4, 6, I64, None)
loop2 = self.schedule(loop1, [pack1,pack2,pack3],
prepend_invariant=True)
loop3 = self.parse("""
v9[i64|2] = vec_int_expand(255)
@@ -372,10 +372,10 @@
pack2 = self.pack(loop1, 2, 4, I32_2, None)
loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
loop3 = self.parse("""
- v1[ui32|2] = vec_raw_load(p0, i1, 2, descr=float)
- i10 = vec_int_unpack(v1[ui32|2], 0, 1)
+ v1[i32|2] = vec_raw_load(p0, i1, 2, descr=float)
+ i10 = vec_int_unpack(v1[i32|2], 0, 1)
raw_store(p0, i3, i10, descr=float)
- i11 = vec_int_unpack(v1[ui32|2], 1, 1)
+ i11 = vec_int_unpack(v1[i32|2], 1, 1)
raw_store(p0, i4, i11, descr=float)
""", False)
# unfortunate ui32 is the type for float32... the unsigned u is for
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -44,6 +44,9 @@
loop.operations = pre + loop.operations
if loop.operations[-1].getopnum() == rop.JUMP:
loop.operations[-1].setdescr(token)
+ for op in loop.operations:
+ if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None:
+ op.setdescr(compile.ResumeAtLoopHeaderDescr())
return loop
def assert_vectorize(self, loop, expected_loop, call_pure_results=None):
@@ -1356,6 +1359,7 @@
def test_abc(self):
+ py.test.skip()
trace="""
# int32 sum
label(p0, p19, i18, i24, i14, i8, i25,
descr=TargetToken(140320937897104))
@@ -1369,22 +1373,6 @@
i32 = int_ge(i30, i25)
guard_false(i32, descr=<Guard0x7f9f03ab17d0>) [p0, i29, i30, i31, p19,
None, None, None]
jump(p0, p19, i30, i31, i29, i8, i25,
descr=TargetToken(140320937897104))
-
- """
- trace ="""
- [i0, i1, i16, i17, i18, i5, p6, p7, p8, f19, p10, p11, p12, p13, p14,
p15, i20, i21]
-
guard_early_exit(descr=<rpython.jit.metainterp.compile.ResumeAtLoopHeaderDescr
object at 0x7f249eb2e510>) [i5, i18, i17, i16, i1, i0, p15, p14, p13, p12, p11,
p10, p8, p7, p6, f19]
- f22 = raw_load(i20, i18, descr=floatarraydescr)
-
guard_not_invalidated(descr=<rpython.jit.metainterp.compile.ResumeGuardNotInvalidated
object at 0x7f249eb2ec90>) [i5, i18, i17, i16, i1, i0, p15, p14, p13, p12,
p11, p10, p8, p7, p6, f22, f19]
- f23 = raw_load(i21, i17, descr=floatarraydescr)
- f24 = float_mul(f22, f23)
- f25 = float_add(f19, f24)
- i26 = int_add(i18, 8)
- i27 = int_add(i17, 8)
- i28 = int_lt(i16, i5)
- guard_true(i28,
descr=<rpython.jit.metainterp.compile.ResumeGuardTrueDescr object at
0x7f249eb99290>) [i5, i26, i27, i16, i1, i0, p15, p14, p13, p12, p11, p10, p8,
p7, p6, f25, None]
- i31 = int_add(i16, 1)
- jump(i0, i1, i31, i27, i26, i5, p6, p7, p8, f25, p10, p11, p12, p13,
p14, p15, i20, i21)
"""
# schedule 885 -> ptype is non for raw_load?
opt = self.vectorize(self.parse_loop(trace))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit