Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77974:66758cffe3af Date: 2015-06-09 09:34 +0200 http://bitbucket.org/pypy/pypy/changeset/66758cffe3af/
Log: generating vector box for accumulation before the label and renaming occurances diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -88,6 +88,49 @@ node.clear_dependencies() node.emitted = True +def vectorbox_outof_box(box, count=-1, size=-1, type='-', clone_signed=True, signed=False): + if box.type not in (FLOAT, INT): + raise AssertionError("cannot create vector box of type %s" % (box.type)) + signed = True + if box.type == FLOAT: + signed = False + return BoxVector(box.type, 2, 8, signed) + +def vectorbox_clone_set(box, count=-1, size=-1, type='-', clone_signed=True, signed=False): + if count == -1: + count = box.item_count + if size == -1: + size = box.item_size + if type == '-': + type = box.item_type + if clone_signed: + signed = box.item_signed + return BoxVector(type, count, size, signed) + +def getpackopnum(type): + if type == INT: + return rop.VEC_INT_PACK + elif type == FLOAT: + return rop.VEC_FLOAT_PACK + # + raise AssertionError("getpackopnum type %s not supported" % (type,)) + +def getunpackopnum(type): + if type == INT: + return rop.VEC_INT_UNPACK + elif type == FLOAT: + return rop.VEC_FLOAT_UNPACK + # + raise AssertionError("getunpackopnum type %s not supported" % (type,)) + +def getexpandopnum(type): + if type == INT: + return rop.VEC_INT_EXPAND + elif type == FLOAT: + return rop.VEC_FLOAT_EXPAND + # + raise AssertionError("getexpandopnum type %s not supported" % (type,)) + class PackType(object): UNKNOWN_TYPE = '-' @@ -163,9 +206,6 @@ self.input_type = None self.output_type = None - def clone_vbox_set_count(self, box, count): - return BoxVector(box.item_type, count, box.item_size, box.item_signed) - def is_vector_arg(self, i): if i < 0 or i >= len(self.arg_ptypes): return False @@ -321,10 +361,8 @@ return vbox_cloned def unpack(self, vbox, index, count, arg_ptype): - vbox_cloned = self.clone_vbox_set_count(vbox, count) - opnum = rop.VEC_FLOAT_UNPACK - if vbox.item_type == INT: - opnum = rop.VEC_INT_UNPACK + vbox_cloned = vectorbox_clone_set(vbox, count=count) + opnum = getunpackopnum(vbox.item_type) op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], vbox_cloned) self.preamble_ops.append(op) return vbox_cloned @@ -336,9 +374,7 @@ this function creates a box pack instruction to merge them to: v1/2 = [A,B,X,Y] """ - opnum = rop.VEC_FLOAT_PACK - if tgt_box.item_type == INT: - opnum = rop.VEC_INT_PACK + opnum = getpackopnum(tgt_box.item_type) arg_count = len(args) i = index while i < arg_count and tgt_box.item_count < packable: @@ -348,7 +384,7 @@ i += 1 continue count = tgt_box.item_count + src_box.item_count - new_box = self.clone_vbox_set_count(tgt_box, count) + new_box = vectorbox_clone_set(tgt_box, count=count) op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i), ConstInt(src_box.item_count)], new_box) self.preamble_ops.append(op) @@ -404,9 +440,7 @@ break i += 1 else: - expand_opnum = rop.VEC_FLOAT_EXPAND - if box_type == INT: - expand_opnum = rop.VEC_INT_EXPAND + expand_opnum = getexpandopnum(box_type) op = ResOperation(expand_opnum, [arg], vbox) invariant_ops.append(op) invariant_vars.append(vbox) @@ -415,9 +449,7 @@ op = ResOperation(rop.VEC_BOX, [ConstInt(len(nodes))], vbox) invariant_ops.append(op) - opnum = rop.VEC_FLOAT_PACK - if arg.type == INT: - opnum = rop.VEC_INT_PACK + opnum = getpackopnum(arg.type) for i,node in enumerate(nodes): op = node.getoperation() arg = op.getarg(argidx) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -68,7 +68,7 @@ opt.analyse_index_calculations() if opt.dependency_graph is not None: self._write_dot_and_convert_to_svg(opt.dependency_graph, "ee" + self.test_name) - opt.schedule() + opt.schedule(False) opt.unroll_loop_iterations(loop, unroll_factor) opt.loop.operations = opt.get_newoperations() self.debug_print_operations(opt.loop) @@ -101,7 +101,7 @@ opt.find_adjacent_memory_refs() opt.extend_packset() opt.combine_packset() - opt.schedule() + opt.schedule(True) return opt def vectorize(self, loop, unroll_factor = -1): @@ -109,7 +109,7 @@ opt.find_adjacent_memory_refs() opt.extend_packset() opt.combine_packset() - opt.schedule() + opt.schedule(True) gso = GuardStrengthenOpt(opt.dependency_graph.index_vars) gso.propagate_all_forward(opt.loop) return opt diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -10,7 +10,8 @@ from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method, Renamer from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, MemoryRef, Node, IndexVar) -from rpython.jit.metainterp.optimizeopt.schedule import VecScheduleData, Scheduler, Pack, Pair, AccumPair +from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleData, + Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum) from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp) from rpython.rlib.objectmodel import we_are_translated @@ -83,7 +84,6 @@ self.smallest_type_bytes = 0 self.early_exit_idx = -1 self.sched_data = None - self.tried_to_pack = False self.costmodel = X86_CostModel(cost_threshold) def propagate_all_forward(self, clear=True): @@ -107,7 +107,7 @@ # find index guards and move to the earliest position self.analyse_index_calculations() if self.dependency_graph is not None: - self.schedule() # reorder the trace + self.schedule(False) # reorder the trace # unroll self.unroll_count = self.get_unroll_count(vsize) @@ -122,7 +122,7 @@ self.combine_packset() if not self.costmodel.profitable(self.packset): raise NotAProfitableLoop() - self.schedule() + self.schedule(True) gso = GuardStrengthenOpt(self.dependency_graph.index_vars) gso.propagate_all_forward(self.loop) @@ -275,8 +275,6 @@ loop = self.loop operations = loop.operations - self.tried_to_pack = True - self.packset = PackSet(self.dependency_graph, operations, self.unroll_count, self.smallest_type_bytes) @@ -356,17 +354,21 @@ if len_before == len(self.packset.packs): break - def schedule(self): + def schedule(self, vector=False): self.guard_early_exit = -1 self.clear_newoperations() sched_data = VecScheduleData(self.metainterp_sd.cpu.vector_register_size) scheduler = Scheduler(self.dependency_graph, sched_data) renamer = Renamer() + # + if vector: + self.packset.accumulate_prepare(sched_data, renamer) + # while scheduler.has_more(): position = len(self._newoperations) ops = scheduler.next(position) for op in ops: - if self.tried_to_pack: + if vector: self.unpack_from_vector(op, sched_data, renamer) self.emit_operation(op) @@ -534,51 +536,6 @@ self.accum_vars[pack.accum_variable] = pack.accum_variable self.packs.append(pack) - def accumulates_pair(self, lnode, rnode, origin_pack): - # lnode and rnode are isomorphic and dependent - assert isinstance(origin_pack, Pair) - lop = lnode.getoperation() - opnum = lop.getopnum() - - if opnum in (rop.FLOAT_ADD, rop.INT_ADD): - roper = rnode.getoperation() - assert lop.numargs() == 2 and lop.result is not None - accum, accum_pos = self.getaccumulator_variable(lop, roper, origin_pack) - if not accum: - return None - # the dependency exists only because of the result of lnode - for dep in lnode.provides(): - if dep.to is rnode: - if not dep.because_of(accum): - # not quite ... this is not handlable - return None - # get the original variable - accum = lop.getarg(accum_pos) - - # in either of the two cases the arguments are mixed, - # which is not handled currently - var_pos = (accum_pos + 1) % 2 - plop = origin_pack.left.getoperation() - if lop.getarg(var_pos) is not plop.result: - return None - prop = origin_pack.right.getoperation() - if roper.getarg(var_pos) is not prop.result: - return None - - # this can be handled by accumulation - return AccumPair(lnode, rnode, accum, accum_pos) - - return None - - def getaccumulator_variable(self, lop, rop, origin_pack): - args = rop.getarglist() - for i, arg in enumerate(args): - print arg, "is", lop.result - if arg is lop.result: - return arg, i - # - return None, -1 - def can_be_packed(self, lnode, rnode, origin_pack): if isomorphic(lnode.getoperation(), rnode.getoperation()): if lnode.independent(rnode): @@ -645,3 +602,67 @@ del self.packs[last_pos] return last_pos + def accumulates_pair(self, lnode, rnode, origin_pack): + # lnode and rnode are isomorphic and dependent + assert isinstance(origin_pack, Pair) + lop = lnode.getoperation() + opnum = lop.getopnum() + + if opnum in (rop.FLOAT_ADD, rop.INT_ADD): + roper = rnode.getoperation() + assert lop.numargs() == 2 and lop.result is not None + accum, accum_pos = self.getaccumulator_variable(lop, roper, origin_pack) + if not accum: + return None + # the dependency exists only because of the result of lnode + for dep in lnode.provides(): + if dep.to is rnode: + if not dep.because_of(accum): + # not quite ... this is not handlable + return None + # get the original variable + accum = lop.getarg(accum_pos) + + # in either of the two cases the arguments are mixed, + # which is not handled currently + var_pos = (accum_pos + 1) % 2 + plop = origin_pack.left.getoperation() + if lop.getarg(var_pos) is not plop.result: + return None + prop = origin_pack.right.getoperation() + if roper.getarg(var_pos) is not prop.result: + return None + + # this can be handled by accumulation + return AccumPair(lnode, rnode, accum, accum_pos) + + return None + + def getaccumulator_variable(self, lop, rop, origin_pack): + args = rop.getarglist() + for i, arg in enumerate(args): + if arg is lop.result: + return arg, i + # + return None, -1 + + def accumulate_prepare(self, sched_data, renamer): + for var, pos in self.accum_vars.items(): + # create a new vector box for the parameters + box = vectorbox_outof_box(var) + op = ResOperation(rop.VEC_BOX, [ConstInt(0)], box) + sched_data.invariant_oplist.append(op) + result = box.clonebox() + # clear the box to zero + op = ResOperation(rop.VEC_INT_XOR, [box, box], result) + sched_data.invariant_oplist.append(op) + box = result + result = box.clonebox() + # pack the scalar value + op = ResOperation(getpackopnum(box.item_type), + [box, var, ConstInt(0), ConstInt(1)], result) + sched_data.invariant_oplist.append(op) + # rename the variable with the box + renamer.start_renaming(var, result) + + _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit