Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78326:d1a942296dd8 Date: 2015-06-26 17:46 +0200 http://bitbucket.org/pypy/pypy/changeset/d1a942296dd8/
Log: rewritten scheduling tests to add type (could not be inferred easily), cost model passing again diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -229,6 +229,7 @@ self.output_type = None self.costmodel = None + def determine_input_type(self, op): arg = op.getarg(0) _, vbox = self.sched_data.getvector_of_box(arg) @@ -267,9 +268,13 @@ # self.check_if_pack_supported(pack) # - self.pack = pack - self.transform_pack() - + if self.must_be_full_but_is_not(pack): + for op in pack.operations: + self.preamble_ops.append(op.getoperation()) + else: + self.pack = pack + self.transform_pack() + # self.pack = None self.costmodel = None self.preamble_ops = None @@ -277,6 +282,9 @@ self.input_type = None self.output_type = None + def must_be_full_but_is_not(self, pack): + return False + def split_pack(self, pack, vec_reg_size): """ Returns how many items of the pack should be emitted as vector operation. """ @@ -294,11 +302,9 @@ pass def transform_pack(self): - op = self.pack.operations[0].getoperation() + op = self.pack.leftmost() args = op.getarglist() - # self.before_argument_transform(args) - # self.transform_arguments(args) # result = op.result @@ -614,6 +620,7 @@ assert isinstance(sizearg, ConstInt) self.size = sizearg.value + def new_result_vector_box(self): type = self.output_type.gettype() count = self.input_type.getcount() @@ -656,6 +663,11 @@ def determine_input_type(self, op): return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size) + def must_be_full_but_is_not(self, pack): + vrs = self.sched_data.vec_reg_size + it = pack.input_type + return it.getsize() * it.getcount() < vrs + def determine_output_type(self, op): return None @@ -833,7 +845,6 @@ ptype = self.input_type if self.input_type is None: # load does not have an input type, but only an output type - assert self.leftmost().is_raw_load() ptype = self.output_type op = self.leftmost() diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py --- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py @@ -136,7 +136,7 @@ savings = self.savings(loop1) assert savings == 2 - @py.test.mark.parametrize("bytes,s", [(1,-1),(2,-1),(4,0),(8,-1)]) + @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,-1)]) def test_sum_float_to_int(self, bytes, s): loop1 = self.parse(""" f10 = raw_load(p0, i0, descr=double) @@ -150,13 +150,19 @@ i15 = int_add(i16, i13) i17 = int_signext(i15, {c}) """.format(c=bytes)) - savings = self.savings(loop1) - # it does not benefit because signext has - # a very inefficient implementation (x86 - # does not provide nice instr to convert - # integer sizes) - # signext -> no benefit, + 2x unpack - assert savings <= s + try: + savings = self.savings(loop1) + if s is None: + py.test.fail("must fail") + # it does not benefit because signext has + # a very inefficient implementation (x86 + # does not provide nice instr to convert + # integer sizes) + # signext -> no benefit, + 2x unpack + assert savings <= s + except NotAProfitableLoop: + if s is not None: + py.test.fail("must not fail") def test_cast(self): loop1 = self.parse(""" diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py --- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py @@ -3,7 +3,8 @@ from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop from rpython.jit.metainterp.optimizeopt.util import equaloplists, Renamer from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData, - Pack, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel) + Pack, Pair, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel, + PackSet) from rpython.jit.metainterp.optimizeopt.dependency import Node from rpython.jit.metainterp.optimizeopt.schedule import PackType from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin @@ -14,6 +15,14 @@ from rpython.jit.tool.oparser import parse as opparse from rpython.jit.tool.oparser_model import get_model +F64 = PackType('f',8,True,2) +F32 = PackType('f',4,True,4) +F32_2 = PackType('f',4,True,2) +I64 = PackType('i',8,True,2) +I32 = PackType('i',4,True,4) +I32_2 = PackType('i',4,True,2) +I16 = PackType('i',2,True,8) + class SchedulerBaseTest(DependencyBaseTest): def parse(self, source, inc_label_jump=True, @@ -58,8 +67,8 @@ del loop.operations[-1] return loop - def pack(self, loop, l, r): - return Pack([Node(op,1+l+i) for i,op in enumerate(loop.operations[1+l:1+r])], None, None) + def pack(self, loop, l, r, input_type, output_type): + return Pack([Node(op,1+l+i) for i,op in enumerate(loop.operations[1+l:1+r])], input_type, output_type) def schedule(self, loop_orig, packs, vec_reg_size=16, prepend_invariant=False, overwrite_funcs=None): loop = get_model(False).ExtendedTreeLoop("loop") @@ -72,16 +81,32 @@ for name, overwrite in (overwrite_funcs or {}).items(): setattr(vsd, name, overwrite) renamer = Renamer() + metainterp_sd = FakeMetaInterpStaticData(self.cpu) + jitdriver_sd = FakeJitDriverStaticData() + opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0) + pairs = [] for pack in packs: + for i in range(len(pack.operations)-1): + o1 = pack.operations[i] + o2 = pack.operations[i+1] + pairs.append(Pair(o1,o2,pack.input_type,pack.output_type)) + + class FakePackSet(PackSet): + def __init__(self): + self.packs = None + + opt.packset = FakePackSet() + opt.packset.packs = pairs + + opt.combine_packset() + + for pack in opt.packset.packs: if pack.opcount() == 1: ops.append(pack.operations[0].getoperation()) else: for op in vsd.as_vector_operation(pack, renamer): ops.append(op) loop.operations = ops - metainterp_sd = FakeMetaInterpStaticData(self.cpu) - jitdriver_sd = FakeJitDriverStaticData() - opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0) opt.clear_newoperations() for op in ops: opt.unpack_from_vector(op, vsd, renamer) @@ -106,7 +131,7 @@ i14 = raw_load(p0, i4, descr=float) i15 = raw_load(p0, i5, descr=float) """) - pack1 = self.pack(loop1, 0, 6) + pack1 = self.pack(loop1, 0, 6, None, F32) loop2 = self.schedule(loop1, [pack1]) loop3 = self.parse(""" v10[i32|4] = vec_raw_load(p0, i0, 4, descr=float) @@ -123,9 +148,9 @@ f10 = cast_int_to_float(i12) f11 = cast_int_to_float(i13) """) - pack1 = self.pack(loop1, 0, 2) - pack2 = self.pack(loop1, 2, 4) - pack3 = self.pack(loop1, 4, 6) + pack1 = self.pack(loop1, 0, 2, None, I64) + pack2 = self.pack(loop1, 2, 4, I64, I32_2) + pack3 = self.pack(loop1, 4, 6, I32_2, F32_2) loop2 = self.schedule(loop1, [pack1, pack2, pack3]) loop3 = self.parse(""" v10[i64|2] = vec_raw_load(p0, i0, 2, descr=long) @@ -139,7 +164,7 @@ i10 = int_add(i0, 73) i11 = int_add(i1, 73) """) - pack1 = self.pack(loop1, 0, 2) + pack1 = self.pack(loop1, 0, 2, I64, I64) loop2 = self.schedule(loop1, [pack1], prepend_invariant=True) loop3 = self.parse(""" v10[i64|2] = vec_box(2) @@ -155,7 +180,7 @@ f10 = float_add(f0, 73.0) f11 = float_add(f1, 73.0) """) - pack1 = self.pack(loop1, 0, 2) + pack1 = self.pack(loop1, 0, 2, I64, I64) loop2 = self.schedule(loop1, [pack1], prepend_invariant=True) loop3 = self.parse(""" v10[f64|2] = vec_box(2) @@ -174,8 +199,8 @@ f12 = float_add(f10, f5) f13 = float_add(f11, f5) """) - pack1 = self.pack(loop1, 0, 2) - pack2 = self.pack(loop1, 2, 4) + pack1 = self.pack(loop1, 0, 2, F64, F64) + pack2 = self.pack(loop1, 2, 4, F64, F64) loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True) loop3 = self.parse(""" v10[f64|2] = vec_box(2) @@ -199,7 +224,7 @@ i10 = int_signext(i1, 4) i11 = int_signext(i1, 4) """, additional_args=['v10[i64|2]']) - pack1 = self.pack(loop1, 0, 2) + pack1 = self.pack(loop1, 0, 2, I64, I32_2) var = self.find_input_arg('v10', loop1) def i1inv103204(v): return 0, var @@ -250,10 +275,11 @@ raw_store(p1, i7, i24, descr=short) raw_store(p1, i8, i25, descr=short) """) - pack1 = self.pack(loop1, 0, 8) - pack2 = self.pack(loop1, 8, 16) - pack3 = self.pack(loop1, 16, 24) - pack4 = self.pack(loop1, 24, 32) + pack1 = self.pack(loop1, 0, 8, None, I64) + pack2 = self.pack(loop1, 8, 16, I64, I32_2) + I16_2 = PackType('i',2,True,2) + pack3 = self.pack(loop1, 16, 24, I32, I16_2) + pack4 = self.pack(loop1, 24, 32, I16, None) def void(b,c): pass loop2 = self.schedule(loop1, [pack1,pack2,pack3,pack4], @@ -297,9 +323,9 @@ raw_store(p1, i3, i12, descr=float) raw_store(p1, i4, i13, descr=float) """) - pack1 = self.pack(loop1, 0, 4) - pack2 = self.pack(loop1, 4, 8) - pack3 = self.pack(loop1, 8, 12) + pack1 = self.pack(loop1, 0, 4, None, I64) + pack2 = self.pack(loop1, 4, 8, I64, I32_2) + pack3 = self.pack(loop1, 8, 12, I32, None) loop2 = self.schedule(loop1, [pack1,pack2,pack3]) loop3 = self.parse(""" v44[f64|2] = vec_raw_load(p0, i1, 2, descr=double) @@ -322,9 +348,9 @@ guard_true(i12) [] guard_true(i13) [] """) - pack1 = self.pack(loop1, 0, 2) - pack2 = self.pack(loop1, 2, 4) - pack3 = self.pack(loop1, 4, 6) + pack1 = self.pack(loop1, 0, 2, None, I64) + pack2 = self.pack(loop1, 2, 4, I64, I64) + pack3 = self.pack(loop1, 4, 6, None, I64) loop2 = self.schedule(loop1, [pack1,pack2,pack3], prepend_invariant=True) loop3 = self.parse(""" v9[i64|2] = vec_int_expand(255) @@ -342,8 +368,8 @@ raw_store(p0, i3, i10, descr=float) raw_store(p0, i4, i11, descr=float) """) - pack1 = self.pack(loop1, 0, 2) - pack2 = self.pack(loop1, 2, 4) + pack1 = self.pack(loop1, 0, 2, None, I32_2) + pack2 = self.pack(loop1, 2, 4, I32_2, None) loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True) loop3 = self.parse(""" v1[ui32|2] = vec_raw_load(p0, i1, 2, descr=float) @@ -361,7 +387,7 @@ i10 = int_and(255, i1) i11 = int_and(255, i1) """) - pack1 = self.pack(loop1, 0, 2) + pack1 = self.pack(loop1, 0, 2, I64, I64) loop2 = self.schedule(loop1, [pack1], prepend_invariant=True) loop3 = self.parse(""" v1[i64|2] = vec_int_expand(255) @@ -375,7 +401,7 @@ i10 = int_and(255, i1) i11 = int_and(255, i1) """) - pack1 = self.pack(loop1, 0, 2) + pack1 = self.pack(loop1, 0, 2, I64, I64) loop2 = self.schedule(loop1, [pack1], prepend_invariant=True) loop3 = self.parse(""" v1[i64|2] = vec_int_expand(255) diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -408,6 +408,7 @@ left = pack1.operations[0] if left in remove_left: remove_left[left] = pack1 + pack1.clear() del self.packset.packs[i] end_ij -= 1 continue _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit