Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78322:7419dfb817a7 Date: 2015-06-26 14:48 +0200 http://bitbucket.org/pypy/pypy/changeset/7419dfb817a7/
Log: tyring to make things more easier, complexity gets hard to manage if extending the accumulation. trying to prevent the splitting entering the scheduling (work in progress) diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -188,6 +188,9 @@ def new_vector_box(self, count = -1): if count == -1: count = self.count + assert count > 1 + assert self.type in ('i','f') + assert self.size > 0 return BoxVector(self.type, count, self.size, self.signed) def __repr__(self): @@ -291,29 +294,29 @@ pass def transform_pack(self): - self.off = 0 - while self.off < self.pack.opcount(): - op = self.pack.operations[self.off].getoperation() - args = op.getarglist() - # - self.before_argument_transform(args) - # - argument_infos = [] - self.transform_arguments(args, argument_infos) - # - result = op.result - result = self.transform_result(result) - # - vop = ResOperation(op.vector, args, result, op.getdescr()) - if op.is_guard(): - assert isinstance(op, GuardResOp) - vop.setfailargs(op.getfailargs()) - vop.rd_snapshot = op.rd_snapshot - self.preamble_ops.append(vop) - stride = self.consumed_operations(argument_infos, result) - self.costmodel.record_pack_savings(self.pack, stride) - assert stride != 0 - self.off += stride + #self.off = 0 + #while self.off < self.pack.opcount(): + op = self.pack.operations[0].getoperation() + args = op.getarglist() + # + self.before_argument_transform(args) + # + argument_infos = [] + self.transform_arguments(args, argument_infos) + # + result = op.result + result = self.transform_result(result) + # + vop = ResOperation(op.vector, args, result, op.getdescr()) + if op.is_guard(): + assert isinstance(op, GuardResOp) + vop.setfailargs(op.getfailargs()) + vop.rd_snapshot = op.rd_snapshot + self.preamble_ops.append(vop) + #stride = self.consumed_operations(argument_infos, result) + self.costmodel.record_pack_savings(self.pack, self.pack.opcount()) + #assert stride != 0 + #self.off += stride def consumed_operations(self, argument_infos, result): ops = self.getoperations() @@ -348,7 +351,7 @@ return BoxVector(type, count, size, signed) def getoperations(self): - return self.pack.operations[self.off:] + return self.pack.operations def transform_arguments(self, args, argument_info): for i,arg in enumerate(args): @@ -406,16 +409,14 @@ argument_info.append(args[i].item_count) def gather(self, vboxes, target_count): # packed < packable and packed < stride: - i = 0 (_, box) = vboxes[0] + i = 1 while i < len(vboxes): - if i+1 >= len(vboxes): - break - (box2_pos, box2) = vboxes[i+1] + (box2_pos, box2) = vboxes[i] if box.getcount() + box2.getcount() <= target_count: box = self.package(box, box.getcount(), box2, box2_pos, box2.getcount()) - i += 2 + i += 1 return box pass # OLD @@ -453,8 +454,10 @@ def extend_int(self, vbox, newtype): vbox_cloned = newtype.new_vector_box(vbox.item_count) self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize()) + newsize = newtype.getsize() + assert newsize > 0 op = ResOperation(rop.VEC_INT_SIGNEXT, - [vbox, ConstInt(newtype.getsize())], + [vbox, ConstInt(newsize)], vbox_cloned) self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount()) self.preamble_ops.append(op) @@ -618,6 +621,9 @@ if count * size > vec_reg_size: count = vec_reg_size // size signed = self.output_type.signed + assert type in ('i','f') + assert size > 0 + assert count > 1 return BoxVector(type, count, size, signed) class SignExtToVectorOp(OpToVectorOp): @@ -625,15 +631,10 @@ OpToVectorOp.__init__(self, intype, outtype) self.size = -1 - def split_pack(self, pack, vec_reg_size): - op0 = pack.operations[0].getoperation() - sizearg = op0.getarg(1) + def before_argument_transform(self, args): + sizearg = args[1] assert isinstance(sizearg, ConstInt) self.size = sizearg.value - _, vbox = self.sched_data.getvector_of_box(op0.getarg(0)) - if vbox.getcount() * self.size > vec_reg_size: - return vec_reg_size // self.size - return vbox.getcount() def new_result_vector_box(self): type = self.output_type.gettype() @@ -642,6 +643,9 @@ if count * self.size > vec_reg_size: count = vec_reg_size // self.size signed = self.input_type.signed + assert type in ('i','f') + assert self.size > 0 + assert count > 1 return BoxVector(type, count, self.size, signed) class LoadToVectorLoad(OpToVectorOp): @@ -655,18 +659,12 @@ return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size) def before_argument_transform(self, args): - args.append(ConstInt(len(self.pack.operations))) + count = min(self.output_type.getcount(), len(self.getoperations())) + args.append(ConstInt(count)) def getscalarsize(self): return self.output_type.getsize() - def new_result_vector_box(self): - type = self.output_type.gettype() - size = self.output_type.getsize() - count = len(self.pack.operations) - signed = self.output_type.signed - return BoxVector(type, count, size, signed) - class StoreToVectorStore(OpToVectorOp): """ Storing operations are special because they are not allowed @@ -846,6 +844,28 @@ def opcount(self): return len(self.operations) + def process_count(self): + return len(self.operations) + + def is_full(self, vec_reg_size): + """ if one input element times the opcount is equal + to the vector register size, we are full! + """ + ptype = self.input_type + if self.input_type is None: + # load does not have an input type, but only an output type + assert self.operations[0].getoperation().is_raw_load() + ptype = self.output_type + bytes = ptype.getsize() * self.process_count() + assert bytes <= vec_reg_size + if bytes == vec_reg_size: + return True + if ptype.getcount() != -1: + size = ptype.getcount() * ptype.getsize() + assert bytes <= size + return bytes == size + return False + def opnum(self): assert len(self.operations) > 0 return self.operations[0].getoperation().getopnum() diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -747,8 +747,8 @@ self.assert_packset_empty(vopt.packset, len(loop.operations), [(6,12), (5,11), (7,13)]) - @pytest.mark.parametrize("descr", ['char','float','int','singlefloat']) - def test_packset_combine_simple(self,descr): + @pytest.mark.parametrize("descr,size", [('char',16),('float',2),('int',2),('singlefloat',4)]) + def test_packset_combine_simple(self,descr,size): ops = """ [p0,i0] i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr) @@ -758,18 +758,7 @@ loop = self.parse_loop(ops) vopt = self.combine_packset(loop,3) assert len(vopt.dependency_graph.memory_refs) == 4 - assert len(vopt.packset.packs) == 1 - self.assert_pack(vopt.packset.packs[0], (1,3,5,7)) - ops = """ - [p0,i0] - i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr) - i1 = int_add(i0,1) - jump(p0,i1) - """.format(descr=descr) - loop = self.parse_loop(ops) - vopt = self.combine_packset(loop,3) - assert len(vopt.dependency_graph.memory_refs) == 4 - assert len(vopt.packset.packs) == 1 + assert len(vopt.packset.packs) == 16 // size self.assert_pack(vopt.packset.packs[0], (1,3,5,7)) @pytest.mark.parametrize("descr,stride", @@ -786,7 +775,7 @@ loop = self.parse_loop(ops) vopt = self.combine_packset(loop,3) assert len(vopt.dependency_graph.memory_refs) == 8 - assert len(vopt.packset.packs) == 1 + assert len(vopt.packset.packs) == (16//stride) * 2 self.assert_pack(vopt.packset.packs[0], (1,3,5,7,9,11,13,15)) def test_packset_combine_2_loads_one_redundant(self): diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -24,6 +24,7 @@ getunpackopnum, PackType, determine_output_type, determine_trans) from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp) +from rpython.rlib import listsort from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.debug import debug_print, debug_start, debug_stop from rpython.rlib.jit import Counters @@ -94,6 +95,9 @@ else: raise +def cmp_pack_lt(a,b): + return a.left.getindex() < b.left.getindex() +packsort = listsort.make_timsort_class(lt=cmp_pack_lt) class VectorizingOptimizer(Optimizer): """ Try to unroll the loop and find instructions to group """ @@ -327,10 +331,13 @@ pack_count = self.packset.pack_count() while True: for pack in self.packset.packs: - self.follow_use_defs(pack) self.follow_def_uses(pack) if pack_count == self.packset.pack_count(): - break + pack_count = self.packset.pack_count() + for pack in self.packset.packs: + self.follow_use_defs(pack) + if pack_count == self.packset.pack_count(): + break pack_count = self.packset.pack_count() def follow_use_defs(self, pack): @@ -371,6 +378,7 @@ raise NotAVectorizeableLoop() i = 0 j = 0 + packsort(self.packset.packs) end_ij = len(self.packset.packs) while True: len_before = len(self.packset.packs) @@ -381,6 +389,8 @@ j += 1 continue pack1 = self.packset.packs[i] + if pack1.is_full(self.cpu.vector_register_size): + break pack2 = self.packset.packs[j] if pack1.rightmost_match_leftmost(pack2): end_ij = self.packset.combine(i,j) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit