Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79593:cd37e9273b1b Date: 2015-09-11 18:13 +0200 http://bitbucket.org/pypy/pypy/changeset/cd37e9273b1b/
Log: work in progress, adapting the transformation from pack -> vecop diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -599,16 +599,6 @@ for guard_node in self.guards: self.build_guard_dependencies(guard_node, tracker) - def prepare_for_scheduling(self): - jump_node = self.nodes[len(self.nodes)-1] - jump_node.emitted = True - label_node = self.nodes[0] - for node in self.nodes: - if node.depends_count() == 0: - self.schedulable_nodes.insert(0, node) - if not we_are_translated(): - assert self.schedulable_nodes[-1] == label_node - def guard_argument_protection(self, guard_node, tracker): """ the parameters the guard protects are an indicator for dependencies. Consider the example: diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -1,6 +1,7 @@ from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT, ConstInt, ConstFloat, TargetToken) -from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp) +from rpython.jit.metainterp.resoperation import (rop, ResOperation, + GuardResOp, VecOperation) from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, MemoryRef, Node, IndexVar) from rpython.jit.metainterp.optimizeopt.renamer import Renamer @@ -16,7 +17,8 @@ self.worklist = [] def post_schedule(self): - pass + loop = self.graph.loop + self.renamer.rename(loop.label.getoperation()) def profitable(self): return self.costmodel.profitable() @@ -24,15 +26,19 @@ def prepare(self): pass + def delay(self): + return False + def has_more(self): return len(self.worklist) > 0 class Scheduler(object): - """ The base class to be instantiated to (re)schedule a vector trace. """ + """ Create an instance of this class to (re)schedule a vector trace. """ def __init__(self): pass def next(self, state): + """ select the next candidate node to be emitted, or None """ worklist = state.worklist visited = 0 while len(worklist) > 0: @@ -41,79 +47,66 @@ node = worklist.pop() if node.emitted: continue - if self.schedulable(node): + if not self.delay(node, state): return node worklist.insert(0, node) visited += 1 return None - def schedulable(self, candidate): - """ Is the candidate scheduleable? Boils down to dependency_count == 0 - """ - if candidate.pack: - pack = candidate.pack - if pack.is_accumulating(): - for node in pack.operations: - for dep in node.depends(): - if dep.to.pack is not pack: - return False - return True - else: - for node in candidate.pack.operations: - if node.depends_count() > 0: - return False - return candidate.depends_count() == 0 + def delay(self, node, state): + """ Delay this operation? + Only if any dependency has not been resolved """ + if state.delay(node): + return True + return node.depends_count() != 0 - def scheduled(self, node, state): - """ Call this function if an operation has been emitted - adds new operations to the schedule list if - their dependency count drops to zero. - In addition it keeps the list sorted (see priority) - """ + def mark_emitted(self, node, state): + """ An operation has been emitted, adds new operations to the worklist + whenever their dependency count drops to zero. + Keeps worklist sorted (see priority) """ + op = node.getoperation() state.renamer.rename(op) state.unpack_from_vector(op, self) - node.position = len(self.oplist) + node.position = len(state.oplist) + worklist = state.worklist for dep in node.provides()[:]: # COPY to = dep.to node.remove_edge_to(to) - nodes = self.schedulable_nodes if not to.emitted and to.depends_count() == 0: # sorts them by priority - i = len(nodes)-1 + i = len(worklist)-1 while i >= 0: - itnode = nodes[i] + itnode = worklist[i] c = (itnode.priority - to.priority) if c < 0: # meaning itnode.priority < to.priority: - nodes.insert(i+1, to) + worklist.insert(i+1, to) break elif c == 0: # if they have the same priority, sort them # using the original position in the trace if itnode.getindex() < to.getindex(): - nodes.insert(i, to) + worklist.insert(i, to) break i -= 1 else: - nodes.insert(0, to) + worklist.insert(0, to) node.clear_dependencies() node.emitted = True def walk_and_emit(self, state): # TODO oplist, renamer, unpack=False): """ Emit all the operations into the oplist parameter. - Initiates the scheduling. - """ + Initiates the scheduling. """ assert isinstance(state, SchedulerState) while state.has_more(): node = self.next(state) if node: - if not state.emit(node): + if not state.emit(node, self): if not node.emitted: op = node.getoperation() - scheduler.scheduled(node, state) + self.mark_emitted(node, state) + state.oplist.append(op) continue - - # it happens that packs can emit many nodes that have been # added to the scheuldable_nodes list, in this case it could # be that no next exists even though the list contains elements @@ -122,13 +115,9 @@ raise AssertionError("schedule failed cannot continue. possible reason: cycle") - # TODO - #jump_node = self.graph.nodes[-1] - #jump_op = jump_node.getoperation() - #renamer.rename(jump_op) - #assert jump_op.getopnum() == rop.JUMP - #self.sched_data.unpack_from_vector(jump_op, self) - #oplist.append(jump_op) + if not we_are_translated(): + for node in state.graph.nodes: + assert node.emitted def vectorbox_outof_box(box, count=-1, size=-1, type='-'): if box.type not in (FLOAT, INT): @@ -140,12 +129,12 @@ def packtype_outof_box(box): if box.type == VECTOR: - return PackType.of(box) + return Type.of(box) else: if box.type == INT: - return PackType(INT, 8, True, 2) + return Type(INT, 8, True, 2) elif box.type == FLOAT: - return PackType(FLOAT, 8, False, 2) + return Type(FLOAT, 8, False, 2) # raise AssertionError("box %s not supported" % (box,)) @@ -184,121 +173,230 @@ # raise AssertionError("getexpandopnum type %s not supported" % (type,)) -class PackType(object): - """ Represents the type of an operation (either it's input or - output). - """ - UNKNOWN_TYPE = '-' +UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT, + rop.UINT_LT, rop.UINT_LE, + rop.UINT_GT, rop.UINT_GE) +class Type(object): + """ The type of one operation. Saves type, size and sign. """ @staticmethod - def of(box, count=-1): - assert box.type == 'V' - if count == -1: - count = box.getcount() - return PackType(box.gettype(), box.getsize(), box.getsigned(), count) + def of(op): + descr = op.getdescr() + if descr: + type = INT + if descr.is_array_of_floats() or descr.concrete_type == FLOAT: + type = FLOAT + size = descr.get_item_size_in_bytes() + sign = descr.is_item_signed() + return Type(type, size, sign) + else: + size = 8 + sign = True + if op.type == 'f' or op.getopnum() in UNSIGNED_OPS: + sign = False + return Type(op.type, size, sign) - @staticmethod - def by_descr(descr, vec_reg_size): - _t = INT - signed = descr.is_item_signed() - if descr.is_array_of_floats() or descr.concrete_type == FLOAT: - _t = FLOAT - signed = False - size = descr.get_item_size_in_bytes() - pt = PackType(_t, size, signed, vec_reg_size // size) - return pt - - def __init__(self, type, size, signed, count=-1): - assert type in (FLOAT, INT, PackType.UNKNOWN_TYPE) + def __init__(self, type, size, signed): + assert type in (FLOAT, INT) self.type = type self.size = size self.signed = signed + + def clone(self): + return Type(self.type, self.size, self.signed) + + def __repr__(self): + sign = '-' + if not self.signed: + sign = '+' + return 'Type(%s%s, %d)' % (sign, self.type, self.size) + + #UNKNOWN_TYPE = '-' + + #@staticmethod + #def of(box, count=-1): + # assert box.type == 'V' + # if count == -1: + # count = box.getcount() + # return Type(box.gettype(), box.getsize(), box.getsigned(), count) + + #@staticmethod + #def by_descr(descr, vec_reg_size): + # _t = INT + # signed = descr.is_item_signed() + # if descr.is_array_of_floats() or descr.concrete_type == FLOAT: + # _t = FLOAT + # signed = False + # size = descr.get_item_size_in_bytes() + # pt = Type(_t, size, signed, vec_reg_size // size) + # return pt + + #def clone(self): + # return Type(self.type, self.size, self.signed, self.count) + + #def new_vector_box(self, count = -1): + # if count == -1: + # count = self.count + # assert count > 1 + # assert self.type in ('i','f') + # assert self.size > 0 + # xxx + # return BoxVector(self.type, count, self.size, self.signed) + + #def combine(self, other): + # """ nothing to be done here """ + # if not we_are_translated(): + # assert self.type == other.type + # assert self.signed == other.signed + + + #def byte_size(self): + # return self.count * self.size + + #def setsize(self, size): + # self.size = size + + #def setcount(self, count): + # self.count = count + + #def gettype(self): + # return self.type + + #def getsize(self): + # return self.size + + #def getcount(self): + # return self.count + + #def pack_byte_size(self, pack): + # if len(pack.operations) == 0: + # return 0 + # return self.getsize() * pack.opcount() + +class TypeRestrict(object): + ANY_TYPE = -1 + ANY_SIZE = -1 + ANY_SIGN = -1 + ANY_COUNT = -1 + SIGNED = 1 + UNSIGNED = 0 + + def __init__(self, type=-1, bytesize=-1, count=-1, sign=-1): + self.type = type + self.bytesize = bytesize + self.sign = sign self.count = count - def clone(self): - return PackType(self.type, self.size, self.signed, self.count) + def allows(self, type, count): + if self.type != ANY_TYPE: + if self.type != type.type: + return False - def new_vector_box(self, count = -1): - if count == -1: - count = self.count - assert count > 1 - assert self.type in ('i','f') - assert self.size > 0 - xxx - return BoxVector(self.type, count, self.size, self.signed) + # TODO - def combine(self, other): - """ nothing to be done here """ - if not we_are_translated(): - assert self.type == other.type - assert self.signed == other.signed + return True - def __repr__(self): - return 'PackType(%s, %d, %d, #%d)' % (self.type, self.size, self.signed, self.count) - - def byte_size(self): - return self.count * self.size - - def setsize(self, size): - self.size = size - - def setcount(self, count): +class TypeOutput(object): + def __init__(self, type, count): + self.type = type self.count = count - def gettype(self): - return self.type - - def getsize(self): - return self.size - - def getcount(self): - return self.count - - def pack_byte_size(self, pack): - if len(pack.operations) == 0: - return 0 - return self.getsize() * pack.opcount() - - -PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False) -PT_FLOAT_2 = PackType(FLOAT, 4, False, 2) -PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2) -PT_FLOAT_GENERIC = PackType(INT, -1, False) -PT_INT64 = PackType(INT, 8, True) -PT_INT32_2 = PackType(INT, 4, True, 2) -PT_INT_GENERIC = PackType(INT, -1, True) - -INT_RES = PT_INT_GENERIC -FLOAT_RES = PT_FLOAT_GENERIC +class PassFirstArg(TypeOutput): + def __init__(self): + pass class OpToVectorOp(object): - def __init__(self, arg_ptypes, result_ptype): - self.arg_ptypes = [a for a in arg_ptypes] # do not use a tuple. rpython cannot union - self.result_ptype = result_ptype - self.vecops = None - self.sched_data = None - self.pack = None - self.input_type = None - self.output_type = None - self.costmodel = None + def __init__(self, restrictargs, typeoutput): + self.args = list(restrictargs) # do not use a tuple. rpython cannot union + self.out = typeoutput - def as_vector_operation(self, pack, sched_data, scheduler, oplist): - self.sched_data = sched_data - self.vecops = oplist - self.costmodel = sched_data.costmodel - self.input_type = pack.input_type - self.output_type = pack.output_type + def as_vector_operation(self, state, pack): # - self.check_if_pack_supported(pack) - self.pack = pack - self.transform_pack() + # TODO self.check_if_pack_supported(pack) + op = pack.leftmost() + args = op.getarglist() + self.prepare_arguments(state, op.getarglist()) # - self.pack = None - self.costmodel = None - self.vecops = None - self.sched_data = None - self.input_type = None - self.output_type = None + vop = VecOperation(op.vector, args, otype. op.getdescr()) + #result = self.transform_result(op) + # + if op.is_guard(): + assert isinstance(op, GuardResOp) + assert isinstance(vop, GuardResOp) + vop.setfailargs(op.getfailargs()) + vop.rd_snapshot = op.rd_snapshot + self.vecops.append(vop) + self.costmodel.record_pack_savings(self.pack, self.pack.opcount()) + # + if pack.is_accumulating(): + box = oplist[position].result + assert box is not None + for node in pack.operations: + op = node.getoperation() + assert not op.returns_void() + scheduler.renamer.start_renaming(op, box) + + def transform_arguments(self, state, args): + self.before_argument_transform(args) + # Transforming one argument to a vector box argument + # The following cases can occur: + # 1) argument is present in the box_to_vbox map. + # a) vector can be reused immediatly (simple case) + # b) vector is to big + # c) vector is to small + # 2) argument is not known to reside in a vector + # a) expand vars/consts before the label and add as argument + # b) expand vars created in the loop body + # + for i,arg in enumerate(args): + if arg.returns_vector(): + continue + if not self.transform_arg_at(i): + continue + box_pos, vbox = state.getvector_of_box(arg) + if not vbox: + # 2) constant/variable expand this box + vbox = self.expand(arg, i) + self.sched_data.setvector_of_box(arg, 0, vbox) + box_pos = 0 + # convert size i64 -> i32, i32 -> i64, ... + if self.input_type.getsize() > 0 and \ + self.input_type.getsize() != vbox.getsize(): + vbox = self.extend(vbox, self.input_type) + + # use the input as an indicator for the pack type + packable = self.input_type.getcount() + packed = vbox.getcount() + assert packed >= 0 + assert packable >= 0 + if packed > packable: + # the argument has more items than the operation is able to process! + # box_pos == 0 then it is already at the right place + if box_pos != 0: + args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type) + self.update_arg_in_vector_pos(i, args[i]) + #self.update_input_output(self.pack) + continue + else: + assert vbox is not None + args[i] = vbox + continue + vboxes = self.vector_boxes_for_args(i) + if packed < packable and len(vboxes) > 1: + # the argument is scattered along different vector boxes + args[i] = self.gather(vboxes, packable) + self.update_arg_in_vector_pos(i, args[i]) + continue + if box_pos != 0: + # The vector box is at a position != 0 but it + # is required to be at position 0. Unpack it! + args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type) + self.update_arg_in_vector_pos(i, args[i]) + continue + #self.update_input_output(self.pack) + # + assert vbox is not None + args[i] = vbox def before_argument_transform(self, args): pass @@ -319,25 +417,6 @@ # see assembler for comment why raise NotAProfitableLoop - - def transform_pack(self): - """ High level transformation routine of a pack to operations """ - op = self.pack.leftmost() - args = op.getarglist() - self.before_argument_transform(args) - self.transform_arguments(args) - # - vop = ResOperation(op.vector, args, op.getdescr()) - #result = self.transform_result(op) - # - if op.is_guard(): - assert isinstance(op, GuardResOp) - assert isinstance(vop, GuardResOp) - vop.setfailargs(op.getfailargs()) - vop.rd_snapshot = op.rd_snapshot - self.vecops.append(vop) - self.costmodel.record_pack_savings(self.pack, self.pack.opcount()) - def transform_result(self, result): if result is None: return None @@ -571,10 +650,10 @@ variables.append(vbox) return vbox - def is_vector_arg(self, i): - if i < 0 or i >= len(self.arg_ptypes): + def transform_arg_at(self, i): + if i < 0 or i >= len(self.args): return False - return self.arg_ptypes[i] is not None + return self.args[i] is not None def get_output_type_given(self, input_type, op): return input_type @@ -590,9 +669,10 @@ class OpToVectorOpConv(OpToVectorOp): def __init__(self, intype, outtype): - self.from_size = intype.getsize() - self.to_size = outtype.getsize() - OpToVectorOp.__init__(self, (intype, ), outtype) + #self.from_size = intype.getsize() + #self.to_size = outtype.getsize() + #OpToVectorOp.__init__(self, (intype, ), outtype) + pass def new_result_vector_box(self): type = self.output_type.gettype() @@ -650,14 +730,14 @@ class LoadToVectorLoad(OpToVectorOp): def __init__(self): - OpToVectorOp.__init__(self, (), PT_GENERIC) + OpToVectorOp.__init__(self, (), TypeRestrict()) def before_argument_transform(self, args): count = min(self.output_type.getcount(), len(self.getoperations())) args.append(ConstInt(count)) def get_output_type_given(self, input_type, op): - return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size) + return Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size) def get_input_type_given(self, output_type, op): return None @@ -668,7 +748,7 @@ Thus a modified split_pack function. """ def __init__(self): - OpToVectorOp.__init__(self, (None, None, PT_GENERIC), None) + OpToVectorOp.__init__(self, (None, None, TypeRestrict()), None) self.has_descr = True def must_be_full_but_is_not(self, pack): @@ -680,7 +760,7 @@ return None def get_input_type_given(self, output_type, op): - return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size) + return Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size) class PassThroughOp(OpToVectorOp): """ This pass through is only applicable if the target @@ -696,55 +776,68 @@ def get_input_type_given(self, output_type, op): raise AssertionError("cannot infer input type from output type") -GUARD_TF = PassThroughOp((PT_INT_GENERIC,)) -INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES) -FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES) -FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES) -LOAD_TRANS = LoadToVectorLoad() -STORE_TRANS = StoreToVectorStore() -# note that the following definition is x86 arch specific -ROP_ARG_RES_VECTOR = { - rop.VEC_INT_ADD: INT_OP_TO_VOP, - rop.VEC_INT_SUB: INT_OP_TO_VOP, - rop.VEC_INT_MUL: INT_OP_TO_VOP, - rop.VEC_INT_AND: INT_OP_TO_VOP, - rop.VEC_INT_OR: INT_OP_TO_VOP, - rop.VEC_INT_XOR: INT_OP_TO_VOP, +class trans(object): + PASS = PassFirstArg() - rop.VEC_INT_EQ: INT_OP_TO_VOP, - rop.VEC_INT_NE: INT_OP_TO_VOP, + TR_ANY_FLOAT = TypeRestrict(FLOAT) + TR_ANY_INTEGER = TypeRestrict(INT) + TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2) + TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2) + TR_LONG = TypeRestrict(INT, 8, 2) + TR_INT_2 = TypeRestrict(INT, 4, 2) - rop.VEC_INT_SIGNEXT: SignExtToVectorOp((PT_INT_GENERIC,), INT_RES), + INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), PASS) + FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), PASS) + FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), PASS) + LOAD = LoadToVectorLoad() + STORE = StoreToVectorStore() + GUARD = PassThroughOp((TR_ANY_INTEGER,)) - rop.VEC_FLOAT_ADD: FLOAT_OP_TO_VOP, - rop.VEC_FLOAT_SUB: FLOAT_OP_TO_VOP, - rop.VEC_FLOAT_MUL: FLOAT_OP_TO_VOP, - rop.VEC_FLOAT_TRUEDIV: FLOAT_OP_TO_VOP, - rop.VEC_FLOAT_ABS: FLOAT_SINGLE_ARG_OP_TO_VOP, - rop.VEC_FLOAT_NEG: FLOAT_SINGLE_ARG_OP_TO_VOP, - rop.VEC_FLOAT_EQ: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), INT_RES), - rop.VEC_FLOAT_NE: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), INT_RES), - rop.VEC_INT_IS_TRUE: OpToVectorOp((PT_INT_GENERIC,PT_INT_GENERIC), PT_INT_GENERIC), + # note that the following definition is x86 arch specific + MAPPING = { + rop.VEC_INT_ADD: INT, + rop.VEC_INT_SUB: INT, + rop.VEC_INT_MUL: INT, + rop.VEC_INT_AND: INT, + rop.VEC_INT_OR: INT, + rop.VEC_INT_XOR: INT, + rop.VEC_INT_EQ: INT, + rop.VEC_INT_NE: INT, - rop.VEC_RAW_LOAD_I: LOAD_TRANS, - rop.VEC_RAW_LOAD_F: LOAD_TRANS, - rop.VEC_GETARRAYITEM_RAW_I: LOAD_TRANS, - rop.VEC_GETARRAYITEM_RAW_F: LOAD_TRANS, - rop.VEC_GETARRAYITEM_GC_I: LOAD_TRANS, - rop.VEC_GETARRAYITEM_GC_F: LOAD_TRANS, - rop.VEC_RAW_STORE: STORE_TRANS, - rop.VEC_SETARRAYITEM_RAW: STORE_TRANS, - rop.VEC_SETARRAYITEM_GC: STORE_TRANS, + rop.VEC_FLOAT_ADD: FLOAT, + rop.VEC_FLOAT_SUB: FLOAT, + rop.VEC_FLOAT_MUL: FLOAT, + rop.VEC_FLOAT_TRUEDIV: FLOAT, + rop.VEC_FLOAT_ABS: FLOAT_UNARY, + rop.VEC_FLOAT_NEG: FLOAT_UNARY, - rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2, PT_FLOAT_2), - rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, PT_DOUBLE_2), - rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2), - rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2), + rop.VEC_RAW_LOAD_I: LOAD, + rop.VEC_RAW_LOAD_F: LOAD, + rop.VEC_GETARRAYITEM_RAW_I: LOAD, + rop.VEC_GETARRAYITEM_RAW_F: LOAD, + rop.VEC_GETARRAYITEM_GC_I: LOAD, + rop.VEC_GETARRAYITEM_GC_F: LOAD, - rop.GUARD_TRUE: GUARD_TF, - rop.GUARD_FALSE: GUARD_TF, -} + rop.VEC_RAW_STORE: STORE, + rop.VEC_SETARRAYITEM_RAW: STORE, + rop.VEC_SETARRAYITEM_GC: STORE, + + rop.GUARD_TRUE: GUARD, + rop.GUARD_FALSE: GUARD, + + # irregular + rop.VEC_INT_SIGNEXT: SignExtToVectorOp((TR_ANY_INTEGER,), None), + + rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(TR_DOUBLE_2, None), #RESTRICT_2_FLOAT), + rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(TR_FLOAT_2, None), #RESTRICT_2_DOUBLE), + rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(TR_DOUBLE_2, None), #RESTRICT_2_INT), + rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(TR_INT_2, None), #RESTRICT_2_DOUBLE), + + rop.VEC_FLOAT_EQ: OpToVectorOp((TR_ANY_FLOAT,TR_ANY_FLOAT), None), + rop.VEC_FLOAT_NE: OpToVectorOp((TR_ANY_FLOAT,TR_ANY_FLOAT), None), + rop.VEC_INT_IS_TRUE: OpToVectorOp((TR_ANY_INTEGER,TR_ANY_INTEGER), None), # TR_ANY_INTEGER), + } def determine_input_output_types(pack, node, forward): """ This function is two fold. If moving forward, it @@ -772,7 +865,7 @@ return input_type, output_type def determine_trans(op): - op2vecop = ROP_ARG_RES_VECTOR.get(op.vector, None) + op2vecop = trans.MAPPING.get(op.vector, None) if op2vecop is None: raise NotImplementedError("missing vecop for '%s'" % (op.getopname(),)) return op2vecop @@ -794,28 +887,27 @@ self.seen = {} def post_schedule(self): - pass - # TODO label rename - if vector: - # XXX - # add accumulation info to the descriptor - #for version in self.loop.versions: - # # this needs to be done for renamed (accum arguments) - # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg) for arg in version.inputargs ] - #self.appended_arg_count = len(sched_data.invariant_vector_vars) - ##for guard_node in graph.guards: - ## op = guard_node.getoperation() - ## failargs = op.getfailargs() - ## for i,arg in enumerate(failargs): - ## if arg is None: - ## continue - ## accum = arg.getaccum() - ## if accum: - ## pass - ## #accum.save_to_descr(op.getdescr(),i) - #self.has_two_labels = len(sched_data.invariant_oplist) > 0 - #self.loop.operations = self.prepend_invariant_operations(sched_data) - pass + loop = self.graph.loop + self.sched_data.unpack_from_vector(loop.jump.getoperation(), self) + SchedulerState.post_schedule(self) + + # add accumulation info to the descriptor + #for version in self.loop.versions: + # # this needs to be done for renamed (accum arguments) + # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg) for arg in version.inputargs ] + #self.appended_arg_count = len(sched_data.invariant_vector_vars) + ##for guard_node in graph.guards: + ## op = guard_node.getoperation() + ## failargs = op.getfailargs() + ## for i,arg in enumerate(failargs): + ## if arg is None: + ## continue + ## accum = arg.getaccum() + ## if accum: + ## pass + ## #accum.save_to_descr(op.getdescr(),i) + #self.has_two_labels = len(sched_data.invariant_oplist) > 0 + #self.loop.operations = self.prepend_invariant_operations(sched_data) def profitable(self): @@ -823,7 +915,10 @@ def prepare(self): SchedulerState.prepare(self) - self.graph.prepare_for_scheduling() + for node in self.graph.nodes: + if node.depends_count() == 0: + self.worklist.insert(0, node) + self.packset.accumulate_prepare(self) for arg in self.graph.loop.label.getarglist(): self.seen[arg] = None @@ -834,32 +929,26 @@ """ if node.pack: for node in node.pack.operations: - scheduler.scheduled(node) - self.as_vector_operation(node.pack) + scheduler.mark_emitted(node, self) + assert node.pack.opcount() > 1 + op2vecop = determine_trans(node.pack.leftmost()) + op2vecop.as_vector_operation(self, node.pack) return True return False - - def as_vector_operation(self, pack): - """ Transform a pack into a single or several operation. - Calls the as_vector_operation of the OpToVectorOp implementation. - """ - assert pack.opcount() > 1 - # properties that hold for the pack are: - # + isomorphism (see func) - # + tightly packed (no room between vector elems) - - position = len(self.oplist) - op = pack.leftmost().getoperation() - determine_trans(op).as_vector_operation(pack, self, self.oplist) - # - if pack.is_accumulating(): - box = oplist[position].result - assert box is not None - for node in pack.operations: - op = node.getoperation() - assert not op.returns_void() - scheduler.renamer.start_renaming(op, box) + def delay(self, node): + if node.pack: + pack = node.pack + if pack.is_accumulating(): + for node in pack.operations: + for dep in node.depends(): + if dep.to.pack is not pack: + return True + else: + for node in pack.operations: + if node.depends_count() > 0: + return True + return False def unpack_from_vector(self, op, scheduler): """ If a box is needed that is currently stored within a vector diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py --- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py @@ -7,7 +7,7 @@ Pack, Pair, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel, PackSet) from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph -from rpython.jit.metainterp.optimizeopt.schedule import PackType, Scheduler +from rpython.jit.metainterp.optimizeopt.schedule import Type, Scheduler from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin from rpython.jit.metainterp.optimizeopt.test.test_dependency import (DependencyBaseTest, FakeDependencyGraph) @@ -17,13 +17,13 @@ from rpython.jit.tool.oparser import parse as opparse from rpython.jit.tool.oparser_model import get_model -F64 = PackType('f',8,False,2) -F32 = PackType('f',4,False,4) -F32_2 = PackType('f',4,False,2) -I64 = PackType('i',8,True,2) -I32 = PackType('i',4,True,4) -I32_2 = PackType('i',4,True,2) -I16 = PackType('i',2,True,8) +F64 = Type('f',8,False) +F32 = Type('f',4,False) +F32_2 = Type('f',4,False) +I64 = Type('i',8,True) +I32 = Type('i',4,True) +I32_2 = Type('i',4,True) +I16 = Type('i',2,True) class FakePackSet(PackSet): def __init__(self, packs): @@ -77,7 +77,6 @@ pairs = [] for pack in packs: for i in range(len(pack.operations)-1): - pack.clear() o1 = pack.operations[i] o2 = pack.operations[i+1] pair = Pair(o1,o2,pack.input_type,pack.output_type) @@ -100,10 +99,10 @@ def test_next_must_not_loop_forever(self): scheduler = Scheduler() - def schedulable(node): + def delay(node, state): node.count += 1 - return False - scheduler.schedulable = schedulable + return True + scheduler.delay = delay class State(object): pass class Node(object): emitted = False; pack = None; count = 0 state = State() @@ -269,7 +268,7 @@ """) pack1 = self.pack(loop1, 0, 8, None, F64) pack2 = self.pack(loop1, 8, 16, F64, I32_2) - I16_2 = PackType('i',2,True,2) + I16_2 = Type('i',2,True) pack3 = self.pack(loop1, 16, 24, I32_2, I16_2) pack4 = self.pack(loop1, 24, 32, I16, None) def void(b,c): diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -22,7 +22,7 @@ from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState, Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum, - getunpackopnum, PackType, determine_input_output_types) + getunpackopnum, Type, determine_input_output_types) from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp, Accum) from rpython.rlib import listsort @@ -449,21 +449,12 @@ if fail: assert False - def schedule(self, state): # TODO vector=False, sched_data=None): - """ Scheduling the trace and emitting vector operations - for packed instructions. - """ + def schedule(self, state): state.prepare() scheduler = Scheduler() scheduler.walk_and_emit(state) - # - if not we_are_translated(): - for node in graph.nodes: - assert node.emitted - # if state.profitable(): return - # state.post_schedule() def prepend_invariant_operations(self, sched_data): @@ -681,14 +672,13 @@ return None # if origin_pack is None: - descr = lnode.getoperation().getdescr() - ptype = PackType.by_descr(descr, self.vec_reg_size) - if lnode.getoperation().is_primitive_load(): + op = lnode.getoperation() + if op.is_primitive_load(): # load outputs value, no input - return Pair(lnode, rnode, None, ptype) + return Pair(lnode, rnode, None, Type.of(op)) else: # store only has an input - return Pair(lnode, rnode, ptype, None) + return Pair(lnode, rnode, Type.of(op), None) if self.profitable_pack(lnode, rnode, origin_pack, forward): input_type, output_type = \ determine_input_output_types(origin_pack, lnode, forward) diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -82,6 +82,11 @@ op.setdescr(descr) return op +def VecOperation(opnum, args, type, count, descr=None): + op = ResOperation(opnum, args, descr) + op.item_type = type + op.item_count = count + return op class AbstractResOpOrInputArg(AbstractValue): _attrs_ = ('_forwarded',) @@ -90,8 +95,6 @@ def get_forwarded(self): return self._forwarded - - class AbstractResOp(AbstractResOpOrInputArg): """The central ResOperation class, representing one operation.""" @@ -555,8 +558,8 @@ class VectorOp(object): _mixin_ = True - _attrs_ = ('item_type','item_count','item_size','item_signed','accum') - _extended_display = False + #_attrs_ = ('item_type','item_count','item_size','item_signed','accum') + _attrs_ = ('item_type', 'item_count') #def __init__(self, item_type=FLOAT, item_count=2, item_size=8, item_signed=False, accum=None): # assert item_type in (FLOAT, INT) @@ -567,13 +570,10 @@ # self.accum = None def gettype(self): - return self.item_type + return self.type - def getsize(self): - return self.item_size - - def getsigned(self): - return self.item_signed + def getbytes(self): + return self.slot_bytes def getcount(self): return self.item_count _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit