Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79663:b304567d9f23 Date: 2015-09-17 11:29 +0200 http://bitbucket.org/pypy/pypy/changeset/b304567d9f23/
Log: costmodel now working again and ported most part of accum as well diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -1,13 +1,13 @@ from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT, ConstInt, ConstFloat, TargetToken) from rpython.jit.metainterp.resoperation import (rop, ResOperation, - GuardResOp, VecOperation, OpHelpers) + GuardResOp, VecOperation, OpHelpers, VecOperationNew) from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, MemoryRef, Node, IndexVar) from rpython.jit.metainterp.optimizeopt.renamer import Renamer from rpython.rlib.objectmodel import we_are_translated from rpython.jit.metainterp.jitexc import NotAProfitableLoop -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, always_inline class SchedulerState(object): @@ -133,27 +133,52 @@ assert node.emitted class TypeRestrict(object): - ANY_TYPE = -1 + ANY_TYPE = '\x00' ANY_SIZE = -1 ANY_SIGN = -1 ANY_COUNT = -1 SIGNED = 1 UNSIGNED = 0 - def __init__(self, type=-1, bytesize=-1, count=-1, sign=-1): + def __init__(self, + type=ANY_TYPE, + bytesize=ANY_SIZE, + count=ANY_SIGN, + sign=ANY_COUNT): self.type = type self.bytesize = bytesize self.sign = sign self.count = count - def allows(self, type, count): - if self.type != ANY_TYPE: - if self.type != type.type: - return False + @always_inline + def any_size(self): + return self.bytesize == TypeRestrict.ANY_SIZE - # TODO + def check(self, value): + assert value.datatype != '\x00' + if self.type != TypeRestrict.ANY_TYPE: + if self.type != value.datatype: + assert 0, "type mismatch" - return True + assert value.bytesize > 0 + if not self.any_size(): + if self.bytesize != value.bytesize: + assert 0, "size mismatch" + + assert value.count > 0 + if self.count != TypeRestrict.ANY_COUNT: + if self.count != value.count: + assert 0, "count mismatch" + + if self.sign != TypeRestrict.ANY_SIGN: + if bool(self.sign) != value.sign: + assert 0, "sign mismatch" + + def max_input_count(self, count): + """ How many """ + if self.count != TypeRestrict.ANY_COUNT: + return self.count + return count class trans(object): @@ -205,32 +230,22 @@ def turn_into_vector(state, pack): """ Turn a pack into a vector instruction """ - # - # TODO self.check_if_pack_supported(pack) - op = pack.leftmost() - args = op.getarglist() + check_if_pack_supported(state, pack) + state.costmodel.record_pack_savings(pack, pack.numops()) + left = pack.leftmost() + args = left.getarglist_copy() prepare_arguments(state, pack, args) - vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr()) + vecop = VecOperation(left.vector, args, left, + pack.numops(), left.getdescr()) + state.oplist.append(vecop) for i,node in enumerate(pack.operations): op = node.getoperation() - state.setvector_of_box(op,i,vop) - # + state.setvector_of_box(op,i,vecop) if op.is_guard(): assert isinstance(op, GuardResOp) - assert isinstance(vop, GuardResOp) - vop.setfailargs(op.getfailargs()) - vop.rd_snapshot = op.rd_snapshot - state.costmodel.record_pack_savings(pack, pack.numops()) - # - if pack.is_accumulating(): - box = oplist[position].result - assert box is not None - for node in pack.operations: - op = node.getoperation() - assert not op.returns_void() - state.renamer.start_renaming(op, box) - # - state.oplist.append(vop) + assert isinstance(vecop, GuardResOp) + vecop.setfailargs(op.getfailargs()) + vecop.rd_snapshot = op.rd_snapshot def prepare_arguments(state, pack, args): @@ -238,7 +253,9 @@ # The following cases can occur: # 1) argument is present in the box_to_vbox map. # a) vector can be reused immediatly (simple case) - # b) an operation forces the unpacking of a vector + # b) the size of the input is mismatching (crop the vector) + # c) values are scattered in differnt registers + # d) the operand is not at the right position in the vector # 2) argument is not known to reside in a vector # a) expand vars/consts before the label and add as argument # b) expand vars created in the loop body @@ -250,24 +267,49 @@ if i >= len(restrictions) or restrictions[i] is None: # ignore this argument continue + restrict = restrictions[i] if arg.returns_vector(): + restrict.check(arg) continue pos, vecop = state.getvector_of_box(arg) if not vecop: # 2) constant/variable expand this box expand(state, pack, args, arg, i) + restrict.check(args[i]) continue + # 1) + args[i] = vecop # a) + assemble_scattered_values(state, pack, args, i) # c) + crop_vector(state, restrict, pack, args, i) # b) + position_values(state, restrict, pack, args, i, pos) # d) + restrict.check(args[i]) + +@always_inline +def crop_vector(state, restrict, pack, args, i): + # convert size i64 -> i32, i32 -> i64, ... + arg = args[i] + newsize, size = restrict.bytesize, arg.bytesize + if not restrict.any_size() and newsize != size: + assert arg.type == 'i' + state._prevent_signext(newsize, size) + count = arg.count + vecop = VecOperationNew(rop.VEC_INT_SIGNEXT, [arg, ConstInt(newsize)], + 'i', newsize, arg.signed, count) + state.oplist.append(vecop) + state.costmodel.record_cast_int(size, newsize, count) args[i] = vecop - assemble_scattered_values(state, pack, args, i) - position_values(state, pack, args, i, pos) +@always_inline def assemble_scattered_values(state, pack, args, index): - vectors = pack.argument_vectors(state, pack, index) + args_at_index = [node.getoperation().getarg(index) for node in pack.operations] + args_at_index[0] = args[index] + vectors = pack.argument_vectors(state, pack, index, args_at_index) if len(vectors) > 1: # the argument is scattered along different vector boxes args[index] = gather(state, vectors, pack.numops()) state.remember_args_in_vector(pack, index, args[index]) +@always_inline def gather(state, vectors, count): # packed < packable and packed < stride: (_, arg) = vectors[0] i = 1 @@ -278,39 +320,32 @@ i += 1 return arg -def position_values(state, pack, args, index, position): +@always_inline +def position_values(state, restrict, pack, args, index, position): if position != 0: # The vector box is at a position != 0 but it # is required to be at position 0. Unpack it! arg = args[index] - args[index] = unpack_from_vector(state, arg, position, arg.count - position) + count = restrict.max_input_count(arg.count) + args[index] = unpack_from_vector(state, arg, position, count) state.remember_args_in_vector(pack, index, args[index]) - # convert size i64 -> i32, i32 -> i64, ... - # TODO if self.bytesize > 0: - # determine_trans( - # self.input_type.getsize() != vecop.getsize(): - # vecop = self.extend(vecop, self.input_type) - -def check_if_pack_supported(self, pack): - op0 = pack.operations[0].getoperation() - if self.input_type is None: - # must be a load/guard op - return - insize = self.input_type.getsize() - if op0.is_typecast(): +def check_if_pack_supported(state, pack): + left = pack.leftmost() + insize = left.bytesize + if left.is_typecast(): # prohibit the packing of signext calls that # cast to int16/int8. - _, outsize = op0.cast_to() - self.sched_data._prevent_signext(outsize, insize) - if op0.getopnum() == rop.INT_MUL: + state._prevent_signext(left.cast_to_bytesize(), + left.cast_from_bytesize()) + if left.getopnum() == rop.INT_MUL: if insize == 8 or insize == 1: # see assembler for comment why raise NotAProfitableLoop def unpack_from_vector(state, arg, index, count): """ Extract parts of the vector box into another vector box """ - print "unpack i", index, "c", count, "v", arg + #print "unpack i", index, "c", count, "v", arg assert count > 0 assert index + count <= arg.count args = [arg, ConstInt(index), ConstInt(count)] @@ -702,12 +737,12 @@ vector register. """ before_count = len(packlist) - print "splitting pack", self + #print "splitting pack", self pack = self while pack.pack_load(vec_reg_size) > Pack.FULL: pack.clear() oplist, newoplist = pack.slice_operations(vec_reg_size) - print " split of %dx, left: %d" % (len(oplist), len(newoplist)) + #print " split of %dx, left: %d" % (len(oplist), len(newoplist)) pack.operations = oplist pack.update_pack_of_nodes() if not pack.leftmost().is_typecast(): @@ -723,7 +758,7 @@ newpack.clear() newpack.operations = [] break - print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]])) + #print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]])) pack.update_pack_of_nodes() def slice_operations(self, vec_reg_size): @@ -749,11 +784,10 @@ accum = False return rightmost is leftmost and accum - def argument_vectors(self, state, pack, index): - args = [node.getoperation().getarg(index) for node in pack.operations] + def argument_vectors(self, state, pack, index, pack_args_index): vectors = [] last = None - for arg in args: + for arg in pack_args_index: pos, vecop = state.getvector_of_box(arg) if vecop is not last and vecop is not None: vectors.append((pos, vecop)) @@ -792,23 +826,3 @@ assert isinstance(right, Node) Pair.__init__(self, left, right) self.accum = accum - -#def extend(self, vbox, newtype): -# assert vbox.gettype() == newtype.gettype() -# if vbox.gettype() == INT: -# return self.extend_int(vbox, newtype) -# else: -# raise NotImplementedError("cannot yet extend float") -# -#def extend_int(self, vbox, newtype): -# vbox_cloned = newtype.new_vector_box(vbox.getcount()) -# self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize()) -# newsize = newtype.getsize() -# assert newsize > 0 -# op = ResOperation(rop.VEC_INT_SIGNEXT, -# [vbox, ConstInt(newsize)], -# vbox_cloned) -# self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount()) -# self.vecops.append(op) -# return vbox_cloned - diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py --- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py @@ -141,7 +141,7 @@ savings = self.savings(loop1) assert savings == 2 - @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,0)]) + @py.test.mark.parametrize("bytes,s", [(1,0),(2,0),(4,0),(8,0)]) def test_sum_float_to_int(self, bytes, s): loop1 = self.parse_trace(""" f10 = raw_load_f(p0, i0, descr=double) @@ -200,5 +200,16 @@ except NotAProfitableLoop: pass + def test_force_long_to_int_cast(self): + trace = self.parse_trace(""" + i10 = raw_load_i(p0, i1, descr=long) + i11 = raw_load_i(p0, i2, descr=long) + f10 = cast_int_to_float(i10) + f11 = cast_int_to_float(i11) + """) + number = self.savings(trace) + assert number == 1 + + class Test(CostModelBaseTest, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -23,7 +23,8 @@ from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState, Scheduler, Pack, Pair, AccumPair) from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt -from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp, Accum) +from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp, + Accum, OpHelpers, VecOperation) from rpython.rlib import listsort from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.debug import debug_print, debug_start, debug_stop @@ -643,8 +644,10 @@ def record_cast_int(self, fromsize, tosize, count): # for each move there is 1 instruction - self.savings += -count - print "$$$ cast", -count, "now", self.savings + if fromsize == 8 and tosize == 4 and count == 2: + self.savings -= 1 + else: + self.savings += -count def record_vector_pack(self, src, index, count): if src.datatype == FLOAT: @@ -700,7 +703,6 @@ if self.profitable_pack(lnode, rnode, origin_pack, forward): return Pair(lnode, rnode) else: - print "dependent" if self.contains_pair(lnode, rnode): return None if origin_pack is not None: @@ -787,7 +789,7 @@ size = INT_WORD if left.type == 'f': size = FLOAT_WORD - if left.bytesize == right.bytesize and left.bytesize == size: + if not (left.bytesize == right.bytesize and left.bytesize == size): # do not support if if the type size is smaller # than the cpu word size. # WHY? @@ -811,35 +813,34 @@ for pack in self.packs: if not pack.is_accumulating(): continue - xxx accum = pack.accum - # create a new vector box for the parameters - box = pack.input_type.new_vector_box() - size = vec_reg_size // pack.input_type.getsize() + datatype = accum.getdatatype() + bytesize = accum.getbytesize() + count = vec_reg_size // bytesize + signed = datatype == 'i' + oplist = state.invariant_oplist # reset the box to zeros or ones if accum.operator == Accum.PLUS: - op = ResOperation(rop.VEC_BOX, [ConstInt(size)], box) - state.invariant_oplist.append(op) - result = box.clonebox() - op = ResOperation(rop.VEC_INT_XOR, [box, box], result) - state.invariant_oplist.append(op) - box = result + vecop = OpHelpers.create_vec(datatype, bytesize, signed) + oplist.append(vecop) + vecop = VecOperation(rop.VEC_INT_XOR, [vecop, vecop], + vecop, count) + oplist.append(vecop) elif accum.operator == Accum.MULTIPLY: # multiply is only supported by floats - op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstFloat(1.0), ConstInt(size)], box) - state.invariant_oplist.append(op) + vecop = OpHelpers.create_vec_expand(ConstFloat(1.0), bytesize, + signed, count) + oplist.append(vecop) else: - raise NotImplementedError("can only handle %s" % accum.operator) - result = box.clonebox() - assert isinstance(result, BoxVector) - result.accum = accum + raise NotImplementedError("cannot handle %s" % accum.operator) # pack the scalar value - op = ResOperation(getpackopnum(box.gettype()), - [box, accum.var, ConstInt(0), ConstInt(1)], result) - state.invariant_oplist.append(op) + args = [vecop, accum.getseed(), ConstInt(0), ConstInt(1)] + vecop = OpHelpers.create_vec_pack(datatype, args, bytesize, + signed, count) + oplist.append(vecop) # rename the variable with the box - state.setvector_of_box(accum.getoriginalbox(), 0, result) # prevent it from expansion - state.renamer.start_renaming(accum.getoriginalbox(), result) + state.setvector_of_box(accum.getseed(), 0, vecop) # prevent it from expansion + state.renamer.start_renaming(accum.getseed(), vecop) def split_overloaded_packs(self): newpacks = [] diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -637,15 +637,16 @@ if opnum == rop.FLOAT_MUL: self.operator = Accum.MULTIPLY - def getoriginalbox(self): + def getdatatype(self): + return self.var.datatype + + def getbytesize(self): + return self.var.bytesize + + def getseed(self): + """ The variable holding the seed value """ return self.var - def getop(self): - return self.operator - - def accumulates_value(self): - return True - class CastOp(object): _mixin_ = True @@ -653,7 +654,7 @@ return True def cast_to(self): - _, _, to_type, size = self.casts + to_type, size = self.casts[2], self.casts[3] if self.casts[3] == 0: if self.getopnum() == rop.INT_SIGNEXT: from rpython.jit.metainterp.history import ConstInt _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit