Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78405:03f3796197b8 Date: 2015-07-02 10:48 +0200 http://bitbucket.org/pypy/pypy/changeset/03f3796197b8/
Log: product assembler now uses SHUFPD instead of SHUFPS and moves two floats instead of one to the accumulator resolving test issues with the recent changes diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -515,19 +515,29 @@ def define_prod(): return """ - a = |30| + a = [1,2,3,4,1,2,3,4] + prod(a) + """ + + def define_prod_zero(): + return """ + a = [1,2,3,4,1,2,3,0] prod(a) """ def test_prod(self): result = self.run("prod") - expected = 1 - for i in range(30): - expected *= i * 2 - assert result == expected + assert int(result) == 576 self.check_trace_count(1) self.check_vectorized(1, 1) + def test_prod_zero(self): + result = self.run("prod_zero") + assert int(result) == 0 + self.check_trace_count(1) + self.check_vectorized(1, 1) + + def define_max(): return """ a = |30| diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py --- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py +++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py @@ -42,7 +42,8 @@ import _numpypy.multiarray as np a = np.array([{a}]*{count}, dtype='{adtype}') b = np.array([{b}]*{count}, dtype='{bdtype}') - c = a {op} b + for i in range(20): + c = a {op} b return c.sum() """.format(op=op, adtype=adtype, bdtype=bdtype, count=count, a=a, b=b) exec py.code.Source(source).compile() diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2560,8 +2560,8 @@ scratchloc = X86_64_XMM_SCRATCH_REG self.mov(accumloc, scratchloc) # swap the two elements - self.mc.SHUFPS_xxi(scratchloc.value, scratchloc.value, 0x01) - self.mc.MULPD(accumloc, scratchloc) + self.mc.SHUFPD_xxi(scratchloc.value, scratchloc.value, 0x01) + self.mc.MULSD(accumloc, scratchloc) if accumloc is not targetloc: self.mov(accumloc, targetloc) @@ -2756,7 +2756,8 @@ srcloc, sizeloc = arglocs size = sizeloc.value if isinstance(srcloc, ConstFloatLoc): - self.mov(srcloc, resloc) + # they are aligned! + self.mc.MOVAPD(resloc, srcloc) elif size == 4: # the register allocator forces src to be the same as resloc # r = (s[0], s[0], r[0], r[0]) diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -162,10 +162,12 @@ @staticmethod def by_descr(descr, vec_reg_size): _t = INT + signed = descr.is_item_signed() if descr.is_array_of_floats() or descr.concrete_type == FLOAT: _t = FLOAT + signed = False size = descr.get_item_size_in_bytes() - pt = PackType(_t, size, descr.is_item_signed(), vec_reg_size // size) + pt = PackType(_t, size, signed, vec_reg_size // size) return pt def __init__(self, type, size, signed, count=-1): @@ -214,13 +216,13 @@ return self.count +PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False) PT_FLOAT_2 = PackType(FLOAT, 4, False, 2) PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2) -PT_FLOAT_GENERIC = PackType(INT, -1, True) +PT_FLOAT_GENERIC = PackType(INT, -1, False) PT_INT64 = PackType(INT, 8, True) PT_INT32_2 = PackType(INT, 4, True, 2) PT_INT_GENERIC = PackType(INT, -1, True) -PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False) INT_RES = PT_INT_GENERIC FLOAT_RES = PT_FLOAT_GENERIC @@ -239,8 +241,7 @@ def check_if_pack_supported(self, pack): op0 = pack.operations[0].getoperation() if self.input_type is None: - # must be a load operation - assert op0.is_raw_load() + # must be a load/guard op return insize = self.input_type.getsize() if op0.casts_box(): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py --- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py @@ -136,7 +136,7 @@ savings = self.savings(loop1) assert savings == 2 - @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,-1)]) + @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,0)]) def test_sum_float_to_int(self, bytes, s): loop1 = self.parse(""" f10 = raw_load(p0, i0, descr=double) @@ -189,8 +189,11 @@ f106 = cast_int_to_float(i110) f107 = cast_int_to_float(i111) """) - savings = self.savings(loop1) - assert savings <= -2 + try: + self.savings(loop1) + py.test.fail("must not profitable!") + except NotAProfitableLoop: + pass class Test(CostModelBaseTest, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py --- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py @@ -15,9 +15,9 @@ from rpython.jit.tool.oparser import parse as opparse from rpython.jit.tool.oparser_model import get_model -F64 = PackType('f',8,True,2) -F32 = PackType('f',4,True,4) -F32_2 = PackType('f',4,True,2) +F64 = PackType('f',8,False,2) +F32 = PackType('f',4,False,4) +F32_2 = PackType('f',4,False,2) I64 = PackType('i',8,True,2) I32 = PackType('i',4,True,4) I32_2 = PackType('i',4,True,2) @@ -180,7 +180,7 @@ f10 = float_add(f0, 73.0) f11 = float_add(f1, 73.0) """) - pack1 = self.pack(loop1, 0, 2, I64, I64) + pack1 = self.pack(loop1, 0, 2, F64, F64) loop2 = self.schedule(loop1, [pack1], prepend_invariant=True) loop3 = self.parse(""" v10[f64|2] = vec_box(2) @@ -275,10 +275,10 @@ raw_store(p1, i7, i24, descr=short) raw_store(p1, i8, i25, descr=short) """) - pack1 = self.pack(loop1, 0, 8, None, I64) - pack2 = self.pack(loop1, 8, 16, I64, I32_2) + pack1 = self.pack(loop1, 0, 8, None, F64) + pack2 = self.pack(loop1, 8, 16, F64, I32_2) I16_2 = PackType('i',2,True,2) - pack3 = self.pack(loop1, 16, 24, I32, I16_2) + pack3 = self.pack(loop1, 16, 24, I32_2, I16_2) pack4 = self.pack(loop1, 24, 32, I16, None) def void(b,c): pass @@ -323,17 +323,17 @@ raw_store(p1, i3, i12, descr=float) raw_store(p1, i4, i13, descr=float) """) - pack1 = self.pack(loop1, 0, 4, None, I64) - pack2 = self.pack(loop1, 4, 8, I64, I32_2) + pack1 = self.pack(loop1, 0, 4, None, F64) + pack2 = self.pack(loop1, 4, 8, F64, I32_2) pack3 = self.pack(loop1, 8, 12, I32, None) loop2 = self.schedule(loop1, [pack1,pack2,pack3]) loop3 = self.parse(""" v44[f64|2] = vec_raw_load(p0, i1, 2, descr=double) v45[f64|2] = vec_raw_load(p0, i3, 2, descr=double) - v46[f32|2] = vec_cast_float_to_singlefloat(v44[f64|2]) - v47[f32|2] = vec_cast_float_to_singlefloat(v45[f64|2]) - v41[f32|4] = vec_float_pack(v46[f32|2], v47[f32|2], 2, 2) - vec_raw_store(p1, i1, v41[f32|4], descr=float) + v46[i32|2] = vec_cast_float_to_singlefloat(v44[f64|2]) + v47[i32|2] = vec_cast_float_to_singlefloat(v45[f64|2]) + v41[i32|4] = vec_int_pack(v46[i32|2], v47[i32|2], 2, 2) + vec_raw_store(p1, i1, v41[i32|4], descr=float) """, False) self.assert_equal(loop2, loop3) @@ -350,7 +350,7 @@ """) pack1 = self.pack(loop1, 0, 2, None, I64) pack2 = self.pack(loop1, 2, 4, I64, I64) - pack3 = self.pack(loop1, 4, 6, None, I64) + pack3 = self.pack(loop1, 4, 6, I64, None) loop2 = self.schedule(loop1, [pack1,pack2,pack3], prepend_invariant=True) loop3 = self.parse(""" v9[i64|2] = vec_int_expand(255) @@ -372,10 +372,10 @@ pack2 = self.pack(loop1, 2, 4, I32_2, None) loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True) loop3 = self.parse(""" - v1[ui32|2] = vec_raw_load(p0, i1, 2, descr=float) - i10 = vec_int_unpack(v1[ui32|2], 0, 1) + v1[i32|2] = vec_raw_load(p0, i1, 2, descr=float) + i10 = vec_int_unpack(v1[i32|2], 0, 1) raw_store(p0, i3, i10, descr=float) - i11 = vec_int_unpack(v1[ui32|2], 1, 1) + i11 = vec_int_unpack(v1[i32|2], 1, 1) raw_store(p0, i4, i11, descr=float) """, False) # unfortunate ui32 is the type for float32... the unsigned u is for diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -44,6 +44,9 @@ loop.operations = pre + loop.operations if loop.operations[-1].getopnum() == rop.JUMP: loop.operations[-1].setdescr(token) + for op in loop.operations: + if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None: + op.setdescr(compile.ResumeAtLoopHeaderDescr()) return loop def assert_vectorize(self, loop, expected_loop, call_pure_results=None): @@ -1356,6 +1359,7 @@ def test_abc(self): + py.test.skip() trace=""" # int32 sum label(p0, p19, i18, i24, i14, i8, i25, descr=TargetToken(140320937897104)) @@ -1369,22 +1373,6 @@ i32 = int_ge(i30, i25) guard_false(i32, descr=<Guard0x7f9f03ab17d0>) [p0, i29, i30, i31, p19, None, None, None] jump(p0, p19, i30, i31, i29, i8, i25, descr=TargetToken(140320937897104)) - - """ - trace =""" - [i0, i1, i16, i17, i18, i5, p6, p7, p8, f19, p10, p11, p12, p13, p14, p15, i20, i21] - guard_early_exit(descr=<rpython.jit.metainterp.compile.ResumeAtLoopHeaderDescr object at 0x7f249eb2e510>) [i5, i18, i17, i16, i1, i0, p15, p14, p13, p12, p11, p10, p8, p7, p6, f19] - f22 = raw_load(i20, i18, descr=floatarraydescr) - guard_not_invalidated(descr=<rpython.jit.metainterp.compile.ResumeGuardNotInvalidated object at 0x7f249eb2ec90>) [i5, i18, i17, i16, i1, i0, p15, p14, p13, p12, p11, p10, p8, p7, p6, f22, f19] - f23 = raw_load(i21, i17, descr=floatarraydescr) - f24 = float_mul(f22, f23) - f25 = float_add(f19, f24) - i26 = int_add(i18, 8) - i27 = int_add(i17, 8) - i28 = int_lt(i16, i5) - guard_true(i28, descr=<rpython.jit.metainterp.compile.ResumeGuardTrueDescr object at 0x7f249eb99290>) [i5, i26, i27, i16, i1, i0, p15, p14, p13, p12, p11, p10, p8, p7, p6, f25, None] - i31 = int_add(i16, 1) - jump(i0, i1, i31, i27, i26, i5, p6, p7, p8, f25, p10, p11, p12, p13, p14, p15, i20, i21) """ # schedule 885 -> ptype is non for raw_load? opt = self.vectorize(self.parse_loop(trace)) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit