Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79885:bb3eebb00aa2 Date: 2015-09-28 21:54 +0200 http://bitbucket.org/pypy/pypy/changeset/bb3eebb00aa2/
Log: wunderbar! all but one test_zjit test passing diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -18,7 +18,7 @@ from rpython.jit.metainterp import pyjitpl return pyjitpl._warmrunnerdesc.metainterp_sd.profiler -class TestNumpyJit(Jit386Mixin): +class TestNumpyJit(LLJitMixin): enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll" graph = None interp = None @@ -99,11 +99,6 @@ backendopt=True, graph_and_interp_only=True, ProfilerClass=Profiler, - translate_support_code=True, - translationoptions={'gc':'minimark', - 'gcrootfinder': 'asmgcc', - 'gcremovetypeptr': False - }, vec=True) self.__class__.interp = interp self.__class__.graph = graph @@ -120,8 +115,6 @@ self.compile_graph() profiler = get_profiler() profiler.start() - from rpython.jit.metainterp import pyjitpl - pyjitpl._warmrunnerdesc.jitcounter = counter.DeterministicJitCounter() reset_jit() i = self.code_mapping[name] retval = self.interp.eval_graph(self.graph, [i]) @@ -165,7 +158,7 @@ def test_float32_add(self): result = self.run("float32_add") self.assert_float_equal(result, 15.0 + 15.0) - self.check_vectorized(1, 1) + self.check_vectorized(2, 2) def define_float_add(): return """ @@ -198,7 +191,7 @@ def test_float32_add_const(self): result = self.run("float32_add_const") self.assert_float_equal(result, 29.0 + 77.345) - self.check_vectorized(1, 1) + self.check_vectorized(2, 2) def define_float_add_const(): return """ @@ -240,7 +233,7 @@ def test_int_expand(self): result = self.run("int_expand") assert int(result) == 7+16+8+16 - self.check_vectorized(1, 1) + self.check_vectorized(2, 2) def define_int32_expand(): return """ @@ -255,7 +248,7 @@ def test_int32_expand(self): result = self.run("int32_expand") assert int(result) == 7+16+8+16 - self.check_vectorized(2, 2) + self.check_vectorized(2, 1) def define_int16_expand(): return """ @@ -271,7 +264,7 @@ i = 8 assert int(result) == i*16 + sum(range(7,7+i)) # currently is is not possible to accum for types with < 8 bytes - self.check_vectorized(3, 1) + self.check_vectorized(3, 0) def define_int8_expand(): return """ @@ -289,7 +282,7 @@ # neither does sum # a + c should work, but it is given as a parameter # thus the accum must handle this! - self.check_vectorized(3, 1) + self.check_vectorized(3, 0) def define_int32_add_const(): return """ @@ -306,7 +299,7 @@ def test_int32_add_const(self): result = self.run("int32_add_const") assert int(result) == 7+1+8+1+11+2+12+2 - self.check_vectorized(1, 1) + self.check_vectorized(2, 2) def define_float_mul_array(): return """ @@ -338,7 +331,7 @@ def test_int32_mul_array(self): result = self.run("int32_mul_array") assert int(result) == 7*7+8*8+11*11+12*12 - self.check_vectorized(1, 1) + self.check_vectorized(2, 2) def define_float32_mul_array(): return """ @@ -366,7 +359,7 @@ def test_conversion(self): result = self.run("conversion") assert result == sum(range(30)) + sum(range(30)) - self.check_vectorized(2, 2) # only sum and astype(int) succeed + self.check_vectorized(4, 2) # only sum and astype(int) succeed def define_sum(): return """ @@ -396,7 +389,7 @@ def test_sum_int(self): result = self.run("sum_int") assert result == sum(range(65)) - self.check_vectorized(1, 1) + self.check_vectorized(2, 2) def define_sum_multi(): return """ @@ -420,7 +413,9 @@ def test_sum_float_to_int16(self): result = self.run("sum_float_to_int16") assert result == sum(range(30)) - self.check_vectorized(1, 0) + # one can argue that this is not desired, + # but unpacking exactly hits savings = 0 + self.check_vectorized(1, 1) def define_sum_float_to_int32(): return """ a = |30| @@ -504,7 +499,7 @@ retval = self.interp.eval_graph(self.graph, [i]) # check that we got only one loop assert len(get_stats().loops) == 1 - self.check_vectorized(2, 1) + self.check_vectorized(3, 1) def define_prod(): return """ @@ -823,7 +818,7 @@ result = self.run("dot") assert result == 184 self.check_trace_count(4) - self.check_vectorized(3,1) + self.check_vectorized(1,1) def define_argsort(): return """ @@ -923,7 +918,7 @@ def test_dot_matrix(self): result = self.run("dot_matrix") assert int(result) == 86 - self.check_vectorized(2, 1) + self.check_vectorized(1, 1) # NOT WORKING diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -9,6 +9,7 @@ from rpython.rlib.objectmodel import we_are_translated from rpython.jit.metainterp.jitexc import NotAProfitableLoop from rpython.rlib.objectmodel import specialize, always_inline +from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop, NotAProfitableLoop class SchedulerState(object): @@ -206,6 +207,25 @@ return self.count return count +class OpRestrict(object): + def __init__(self, argument_restris): + self.argument_restrictions = argument_restris + + def check_operation(self, state, pack, op): + pass + +class OpMatchSizeTypeFirst(OpRestrict): + def check_operation(self, state, pack, op): + arg0 = op.getarg(0) + bytesize = arg0.bytesize + datatype = arg0.datatype + + for arg in op.getarglist(): + if arg.bytesize != bytesize: + raise NotAVectorizeableLoop() + if arg.datatype != datatype: + raise NotAVectorizeableLoop() + class trans(object): TR_ANY = TypeRestrict() @@ -215,43 +235,46 @@ TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2) TR_INT32_2 = TypeRestrict(INT, 4, 2) + OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER]) + OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT]) + # note that the following definition is x86 arch specific MAPPING = { - rop.VEC_INT_ADD: [TR_ANY_INTEGER, TR_ANY_INTEGER], - rop.VEC_INT_SUB: [TR_ANY_INTEGER, TR_ANY_INTEGER], - rop.VEC_INT_MUL: [TR_ANY_INTEGER, TR_ANY_INTEGER], - rop.VEC_INT_AND: [TR_ANY_INTEGER, TR_ANY_INTEGER], - rop.VEC_INT_OR: [TR_ANY_INTEGER, TR_ANY_INTEGER], - rop.VEC_INT_XOR: [TR_ANY_INTEGER, TR_ANY_INTEGER], - rop.VEC_INT_EQ: [TR_ANY_INTEGER, TR_ANY_INTEGER], - rop.VEC_INT_NE: [TR_ANY_INTEGER, TR_ANY_INTEGER], + rop.VEC_INT_ADD: OR_MSTF_I, + rop.VEC_INT_SUB: OR_MSTF_I, + rop.VEC_INT_MUL: OR_MSTF_I, + rop.VEC_INT_AND: OR_MSTF_I, + rop.VEC_INT_OR: OR_MSTF_I, + rop.VEC_INT_XOR: OR_MSTF_I, + rop.VEC_INT_EQ: OR_MSTF_I, + rop.VEC_INT_NE: OR_MSTF_I, - rop.VEC_FLOAT_ADD: [TR_ANY_FLOAT, TR_ANY_FLOAT], - rop.VEC_FLOAT_SUB: [TR_ANY_FLOAT, TR_ANY_FLOAT], - rop.VEC_FLOAT_MUL: [TR_ANY_FLOAT, TR_ANY_FLOAT], - rop.VEC_FLOAT_TRUEDIV: [TR_ANY_FLOAT, TR_ANY_FLOAT], - rop.VEC_FLOAT_ABS: [TR_ANY_FLOAT], - rop.VEC_FLOAT_NEG: [TR_ANY_FLOAT], + rop.VEC_FLOAT_ADD: OR_MSTF_F, + rop.VEC_FLOAT_SUB: OR_MSTF_F, + rop.VEC_FLOAT_MUL: OR_MSTF_F, + rop.VEC_FLOAT_TRUEDIV: OR_MSTF_F, + rop.VEC_FLOAT_ABS: OpRestrict([TR_ANY_FLOAT]), + rop.VEC_FLOAT_NEG: OpRestrict([TR_ANY_FLOAT]), - rop.VEC_RAW_STORE: [None, None, TR_ANY], - rop.VEC_SETARRAYITEM_RAW: [None, None, TR_ANY], - rop.VEC_SETARRAYITEM_GC: [None, None, TR_ANY], + rop.VEC_RAW_STORE: OpRestrict([None, None, TR_ANY]), + rop.VEC_SETARRAYITEM_RAW: OpRestrict([None, None, TR_ANY]), + rop.VEC_SETARRAYITEM_GC: OpRestrict([None, None, TR_ANY]), - rop.GUARD_TRUE: [TR_ANY_INTEGER], - rop.GUARD_FALSE: [TR_ANY_INTEGER], + rop.GUARD_TRUE: OpRestrict([TR_ANY_INTEGER]), + rop.GUARD_FALSE: OpRestrict([TR_ANY_INTEGER]), ## irregular - rop.VEC_INT_SIGNEXT: [TR_ANY_INTEGER], + rop.VEC_INT_SIGNEXT: OpRestrict([TR_ANY_INTEGER]), - rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: [TR_DOUBLE_2], + rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpRestrict([TR_DOUBLE_2]), # weird but the trace will store single floats in int boxes - rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: [TR_INT32_2], - rop.VEC_CAST_FLOAT_TO_INT: [TR_DOUBLE_2], - rop.VEC_CAST_INT_TO_FLOAT: [TR_INT32_2], + rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpRestrict([TR_INT32_2]), + rop.VEC_CAST_FLOAT_TO_INT: OpRestrict([TR_DOUBLE_2]), + rop.VEC_CAST_INT_TO_FLOAT: OpRestrict([TR_INT32_2]), - rop.VEC_FLOAT_EQ: [TR_ANY_FLOAT,TR_ANY_FLOAT], - rop.VEC_FLOAT_NE: [TR_ANY_FLOAT,TR_ANY_FLOAT], - rop.VEC_INT_IS_TRUE: [TR_ANY_INTEGER,TR_ANY_INTEGER], + rop.VEC_FLOAT_EQ: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]), + rop.VEC_FLOAT_NE: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]), + rop.VEC_INT_IS_TRUE: OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]), } def turn_into_vector(state, pack): @@ -259,6 +282,9 @@ check_if_pack_supported(state, pack) state.costmodel.record_pack_savings(pack, pack.numops()) left = pack.leftmost() + oprestrict = trans.MAPPING.get(pack.leftmost().vector, None) + if oprestrict is not None: + oprestrict.check_operation(state, pack, left) args = left.getarglist_copy() prepare_arguments(state, pack, args) vecop = VecOperation(left.vector, args, left, @@ -287,9 +313,10 @@ # a) expand vars/consts before the label and add as argument # b) expand vars created in the loop body # - restrictions = trans.MAPPING.get(pack.leftmost().vector, []) - if not restrictions: + oprestrict = trans.MAPPING.get(pack.leftmost().vector, None) + if not oprestrict: return + restrictions = oprestrict.argument_restrictions for i,arg in enumerate(args): if i >= len(restrictions) or restrictions[i] is None: # ignore this argument diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py @@ -1234,6 +1234,23 @@ vopt = self.vectorize(trace) self.assert_equal(trace, trace_opt) + def test_sum_int16_prevent(self): + trace = self.parse_loop(""" + [i0, p1, i2, p3, i4, i5, i6] + i7 = raw_load_i(i5, i4, descr=int16arraydescr) + i8 = int_add(i0, i7) + i10 = int_add(i2, 1) + i12 = int_add(i4, 2) + i13 = int_ge(i10, i6) + guard_false(i13, descr=<rpython.jit.metainterp.compile.ResumeGuardFalseDescr object at 0x7fe5a1848150>) [p3, i10, i8, i12, None, p1, None, None] + jump(i8, p1, i10, p3, i12, i5, i6) + """) + try: + vopt = self.vectorize(trace) + py.test.fail() + except NotAVectorizeableLoop: + pass + def test_axis_sum(self): # TODO trace = """ _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit