Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79649:9d05ed8b7873 Date: 2015-09-15 17:17 +0200 http://bitbucket.org/pypy/pypy/changeset/9d05ed8b7873/
Log: fighting with casting. not quite there, but soon scheduling tests should work diff --git a/rpython/jit/metainterp/optimizeopt/renamer.py b/rpython/jit/metainterp/optimizeopt/renamer.py --- a/rpython/jit/metainterp/optimizeopt/renamer.py +++ b/rpython/jit/metainterp/optimizeopt/renamer.py @@ -1,3 +1,5 @@ +from rpython.jit.metainterp import resoperation +from rpython.jit.metainterp.resume import Snapshot class Renamer(object): def __init__(self): diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -15,11 +15,20 @@ self.graph = graph self.oplist = [] self.worklist = [] + self.invariant_oplist = [] + self.invariant_vector_vars = [] def post_schedule(self): loop = self.graph.loop self.renamer.rename(loop.jump) loop.operations = self.oplist + loop.prefix = self.invariant_oplist + if len(self.invariant_vector_vars) > 0: + # TODO, accum? + args = loop.label.getarglist_copy() + self.invariant_vector_vars + opnum = loop.label.getopnum() + # TODO descr? + loop.prefix_label = loop.label.copy_and_change(opnum, args) def profitable(self): return self.costmodel.profitable() @@ -67,7 +76,7 @@ Keeps worklist sorted (see priority) """ op = node.getoperation() state.renamer.rename(op) - state.unpack_from_vector(op) + state.ensure_args_unpacked(op) node.position = len(state.oplist) worklist = state.worklist for dep in node.provides()[:]: # COPY @@ -105,6 +114,7 @@ if not node.emitted: op = node.getoperation() self.mark_emitted(node, state) + state.seen[op] = None state.oplist.append(op) continue @@ -120,52 +130,6 @@ for node in state.graph.nodes: assert node.emitted -def vectorbox_outof_box(box, count=-1, size=-1, type='-'): - if box.type not in (FLOAT, INT): - raise AssertionError("cannot create vector box of type %s" % (box.type)) - signed = True - if box.type == FLOAT: - signed = False - return BoxVector(box.type, 2, 8, signed) - -def packtype_outof_box(box): - if box.type == VECTOR: - return Type.of(box) - else: - if box.type == INT: - return Type(INT, 8, True, 2) - elif box.type == FLOAT: - return Type(FLOAT, 8, False, 2) - # - raise AssertionError("box %s not supported" % (box,)) - -def vectorbox_clone_set(box, count=-1, size=-1, type='-', clone_signed=True, signed=False): - if count == -1: - count = box.getcount() - if size == -1: - size = box.getsize() - if type == '-': - type = box.gettype() - if clone_signed: - signed = box.getsigned() - return BoxVector(type, count, size, signed) - -def getpackopnum(type): - if type == INT: - return rop.VEC_INT_PACK - elif type == FLOAT: - return rop.VEC_FLOAT_PACK - # - raise AssertionError("getpackopnum type %s not supported" % (type,)) - -def getunpackopnum(type): - if type == INT: - return rop.VEC_INT_UNPACK - elif type == FLOAT: - return rop.VEC_FLOAT_UNPACK - # - raise AssertionError("getunpackopnum type %s not supported" % (type,)) - #UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT, # rop.UINT_LT, rop.UINT_LE, # rop.UINT_GT, rop.UINT_GE) @@ -275,27 +239,6 @@ def bytecount(self): return self.count * self.type.bytecount() -class DataTyper(object): - - def infer_type(self, op): - # default action, pass through: find the first arg - # the output is the same as the first argument! - if op.returns_void() or op.argcount() == 0: - return - arg0 = op.getarg(0) - op.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed) - -class PassFirstArg(TypeOutput): - def __init__(self): - pass - -def update_arg_in_vector_pos(state, argidx, box): - arguments = [op.getoperation().getarg(argidx) for op in self.getoperations()] - for i,arg in enumerate(arguments): - #if i >= box.count: - # break - state.setvector_of_box(arg, i, box) - class TypeRestrict(object): ANY_TYPE = -1 ANY_SIZE = -1 @@ -320,7 +263,6 @@ return True class trans(object): - #DT_PASS = DataTyper() TR_ANY = TypeRestrict() TR_ANY_FLOAT = TypeRestrict(FLOAT) @@ -355,16 +297,9 @@ rop.VEC_FLOAT_ABS: [TR_ANY_FLOAT], rop.VEC_FLOAT_NEG: [TR_ANY_FLOAT], - rop.VEC_RAW_LOAD_I: [None, None, TR_ANY], - rop.VEC_RAW_LOAD_F: [None, None, TR_ANY], - rop.VEC_GETARRAYITEM_RAW_I: [None, None, TR_ANY], - rop.VEC_GETARRAYITEM_RAW_F: [None, None, TR_ANY], - rop.VEC_GETARRAYITEM_GC_I: [None, None, TR_ANY], - rop.VEC_GETARRAYITEM_GC_F: [None, None, TR_ANY], - - rop.VEC_RAW_STORE: [None, None, None, TR_ANY], - rop.VEC_SETARRAYITEM_RAW: [None, None, None, TR_ANY], - rop.VEC_SETARRAYITEM_GC: [None, None, None, TR_ANY], + rop.VEC_RAW_STORE: [None, None, TR_ANY], + rop.VEC_SETARRAYITEM_RAW: [None, None, TR_ANY], + rop.VEC_SETARRAYITEM_GC: [None, None, TR_ANY], rop.GUARD_TRUE: [TR_ANY_INTEGER], rop.GUARD_FALSE: [TR_ANY_INTEGER], @@ -427,12 +362,13 @@ # a) expand vars/consts before the label and add as argument # b) expand vars created in the loop body # - restrictions = trans.MAPPING[pack.leftmost().vector] + restrictions = trans.MAPPING.get(pack.leftmost().vector, []) + if not restrictions: + return for i,arg in enumerate(args): if i >= len(restrictions) or restrictions[i] is None: # ignore this argument continue - print "trans", i, "arg", arg if arg.returns_vector(): continue pos, vecop = state.getvector_of_box(arg) @@ -442,40 +378,32 @@ continue args[i] = vecop assemble_scattered_values(state, pack, args, i) - position_values(state, pack, args, i, arg, pos) + position_values(state, pack, args, i, pos) def assemble_scattered_values(state, pack, args, index): vectors = pack.argument_vectors(state, pack, index) if len(vectors) > 1: # the argument is scattered along different vector boxes - value = gather(vectors, packable) - update_arg_in_vector_pos(state, i, value) - args[i] = value - #if packed < packable and len(vboxes) > 1: - # # the argument is scattered along different vector boxes - # args[i] = self.gather(vboxes, packable) - # self.update_arg_in_vector_pos(i, args[i]) - # continue + args[index] = gather(state, vectors, pack.numops()) + state.remember_args_in_vector(pack, index, args[index]) -def gather(self, vboxes, target_count): # packed < packable and packed < stride: - (_, box) = vboxes[0] +def gather(state, vectors, count): # packed < packable and packed < stride: + (_, arg) = vectors[0] i = 1 - while i < len(vboxes): - (box2_pos, box2) = vboxes[i] - if box.getcount() + box2.getcount() <= target_count: - box = self.package(box, box.getcount(), - box2, box2_pos, box2.getcount()) + while i < len(vectors): + (newarg_pos, newarg) = vectors[i] + if arg.count + newarg.count <= count: + arg = pack_into_vector(state, arg, arg.count, newarg, newarg_pos, newarg.count) i += 1 - return box + return arg -def position_values(state, pack, args, index, arg, pos): - pass - #if pos != 0: - # # The vector box is at a position != 0 but it - # # is required to be at position 0. Unpack it! - # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type) - # self.update_arg_in_vector_pos(i, args[i]) - # continue +def position_values(state, pack, args, index, position): + if position != 0: + # The vector box is at a position != 0 but it + # is required to be at position 0. Unpack it! + arg = args[index] + args[index] = unpack_from_vector(state, arg, position, arg.count - position) + state.remember_args_in_vector(pack, index, args[index]) # convert size i64 -> i32, i32 -> i64, ... # TODO if self.bytesize > 0: @@ -493,7 +421,7 @@ # # pos == 0 then it is already at the right place # if pos != 0: # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type) - # self.update_arg_in_vector_pos(i, args[i]) + # state.remember_args_in_vector(i, args[i]) # #self.update_input_output(self.pack) # continue # else: @@ -504,13 +432,13 @@ #if packed < packable and len(vboxes) > 1: # # the argument is scattered along different vector boxes # args[i] = self.gather(vboxes, packable) - # self.update_arg_in_vector_pos(i, args[i]) + # state.remember_args_in_vector(i, args[i]) # continue #if pos != 0: # # The vector box is at a position != 0 but it # # is required to be at position 0. Unpack it! # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type) - # self.update_arg_in_vector_pos(i, args[i]) + # state.remember_args_in_vector(i, args[i]) # continue ## #assert vecop is not None @@ -551,53 +479,50 @@ self.vecops.append(op) return vbox_cloned -def unpack(self, vbox, index, count, arg_ptype): +def unpack_from_vector(state, arg, index, count): """ Extract parts of the vector box into another vector box """ - assert index < vbox.getcount() - assert index + count <= vbox.getcount() + print "unpack i", index, "c", count, "v", arg assert count > 0 - vbox_cloned = vectorbox_clone_set(vbox, count=count) - opnum = getunpackopnum(vbox.gettype()) - op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], vbox_cloned) - self.costmodel.record_vector_unpack(vbox, index, count) - self.vecops.append(op) - # - return vbox_cloned + assert index + count <= arg.count + args = [arg, ConstInt(index), ConstInt(count)] + vecop = OpHelpers.create_vec_unpack(arg.type, args, arg.bytesize, + arg.signed, count) + state.costmodel.record_vector_unpack(arg, index, count) + state.oplist.append(vecop) + return vecop -def package(self, tgt, tidx, src, sidx, scount): +def pack_into_vector(state, tgt, tidx, src, sidx, scount): """ tgt = [1,2,3,4,_,_,_,_] src = [5,6,_,_] new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2 """ assert sidx == 0 # restriction - count = tgt.getcount() + src.getcount() - new_box = vectorbox_clone_set(tgt, count=count) - opnum = getpackopnum(tgt.gettype()) - op = ResOperation(opnum, [tgt, src, ConstInt(tidx), ConstInt(scount)], new_box) - self.vecops.append(op) - self.costmodel.record_vector_pack(src, sidx, scount) + newcount = tgt.count + scount + args = [tgt, src, ConstInt(tidx), ConstInt(scount)] + vecop = OpHelpers.create_vec_pack(tgt.type, args, tgt.bytesize, tgt.signed, newcount) + state.oplist.append(vecop) + state.costmodel.record_vector_pack(src, sidx, scount) if not we_are_translated(): - self._check_vec_pack(op) - return new_box + _check_vec_pack(vecop) + return vecop -def _check_vec_pack(self, op): - result = op +def _check_vec_pack(op): arg0 = op.getarg(0) arg1 = op.getarg(1) index = op.getarg(2) count = op.getarg(3) - assert isinstance(result, BoxVector) - assert isinstance(arg0, BoxVector) - assert isinstance(index, ConstInt) + assert op.is_vector() + assert arg0.is_vector() + assert index.is_constant() assert isinstance(count, ConstInt) - assert arg0.getsize() == result.getsize() - if isinstance(arg1, BoxVector): - assert arg1.getsize() == result.getsize() + assert arg0.bytesize == op.bytesize + if arg1.is_vector(): + assert arg1.bytesize == op.bytesize else: assert count.value == 1 - assert index.value < result.getcount() - assert index.value + count.value <= result.getcount() - assert result.getcount() > arg0.getcount() + assert index.value < op.count + assert index.value + count.value <= op.count + assert op.count > arg0.count def expand(state, pack, args, arg, index): """ Expand a value into a vector box. useful for arith metic @@ -610,7 +535,8 @@ ops = state.invariant_oplist variables = state.invariant_vector_vars if not arg.is_constant() and arg not in state.inputargs: - ops = self.vecops + # cannot be created before the loop, expand inline + ops = state.oplist variables = None for i, node in enumerate(pack.operations): @@ -620,29 +546,30 @@ i += 1 else: # note that heterogenous nodes are not yet tracked - already_expanded = expanded_map.get(arg, None) - if already_expanded: - return already_expanded + vecop = expanded_map.get(arg, None) + if vecop: + args[index] = vecop + return vecop vecop = OpHelpers.create_vec_expand(arg, op.bytesize, op.signed, pack.numops()) - state.oplist.append(vecop) + ops.append(vecop) if variables is not None: variables.append(vecop) expanded_map[arg] = vecop - for i in range(vecop.count): - state.setvector_of_box(arg, i, vecop) + #for i in range(vecop.count): + # state.setvector_of_box(arg, i, vecop) args[index] = vecop return vecop vecop = OpHelpers.create_vec(arg.type, left.bytesize, left.signed) - state.oplist.append(vecop) + ops.append(vecop) for i,node in enumerate(pack.operations): op = node.getoperation() arg = op.getarg(index) arguments = [vecop, arg, ConstInt(i), ConstInt(1)] vecop = OpHelpers.create_vec_pack(arg.type, arguments, left.bytesize, left.signed, vecop.count+1) - state.setvector_of_box(arg, i, vecop) - state.oplist.append(vecop) + #state.setvector_of_box(arg, i, vecop) + ops.append(vecop) if variables is not None: variables.append(vecop) @@ -654,8 +581,6 @@ self.box_to_vbox = {} self.cpu = cpu self.vec_reg_size = cpu.vector_register_size - self.invariant_oplist = [] - self.invariant_vector_vars = [] self.expanded_map = {} self.costmodel = costmodel self.inputargs = {} @@ -666,7 +591,7 @@ def post_schedule(self): loop = self.graph.loop - self.unpack_from_vector(loop.jump) + self.ensure_args_unpacked(loop.jump) SchedulerState.post_schedule(self) # add accumulation info to the descriptor @@ -727,47 +652,41 @@ return True return False - def unpack_from_vector(self, op): + def ensure_args_unpacked(self, op): """ If a box is needed that is currently stored within a vector box, this utility creates a unpacking instruction. """ - args = op.getarglist() - # unpack for an immediate use - for i, arg in enumerate(op.getarglist()): - if not arg.is_constant(): - argument = self._unpack_from_vector(i, arg) - if arg is not argument: - op.setarg(i, argument) - if not op.returns_void(): - self.seen[op] = None + for i, argument in enumerate(op.getarglist()): + if not argument.is_constant(): + arg = self.ensure_unpacked(i, argument) + if argument is not arg: + op.setarg(i, arg) # unpack for a guard exit if op.is_guard(): + # could be moved to the guard exit fail_args = op.getfailargs() - for i, arg in enumerate(fail_args): - if arg and not arg.is_constant(): - argument = self._unpack_from_vector(i, arg) - if arg is not argument: - fail_args[i] = argument + for i, argument in enumerate(fail_args): + if argument and not argument.is_constant(): + arg = self.ensure_unpacked(i, argument) + if argument is not arg: + fail_arguments[i] = arg - def _unpack_from_vector(self, i, arg): - if arg in self.seen or arg.type == 'V': + def ensure_unpacked(self, index, arg): + if arg in self.seen or not arg.is_vector(): return arg - (j, vbox) = self.getvector_of_box(arg) - if vbox: - if vbox in self.invariant_vector_vars: + (pos, var) = self.getvector_of_box(arg) + if var: + if var in self.invariant_vector_vars: return arg - arg_cloned = arg.clonebox() - self.seen[arg_cloned] = None - self.renamer.start_renaming(arg, arg_cloned) - self.setvector_of_box(arg_cloned, j, vbox) - cj = ConstInt(j) - ci = ConstInt(1) - opnum = getunpackopnum(vbox.gettype()) - unpack_op = ResOperation(opnum, [vbox, cj, ci], arg_cloned) - self.costmodel.record_vector_unpack(vbox, j, 1) - self.oplist.append(unpack_op) - return arg_cloned + args = [var, ConstInt(pos), ConstInt(1)] + vecop = OpHelpers.create_vec_unpack(var.type, args, var.bytesize, + var.signed, 1) + self.renamer.start_renaming(arg, vecop) + self.seen[vecop] = None + self.costmodel.record_vector_unpack(var, pos, 1) + self.oplist.append(vecop) + return vecop return arg def _prevent_signext(self, outsize, insize): @@ -783,11 +702,24 @@ assert not var.is_vector() self.box_to_vbox[var] = (off, vector) + def remember_args_in_vector(self, pack, index, box): + arguments = [op.getoperation().getarg(index) for op in pack.operations] + for i,arg in enumerate(arguments): + if i >= box.count: + break + self.setvector_of_box(arg, i, box) + + def opcount_filling_vector_register(pack, vec_reg_size): """ how many operations of that kind can one execute with a machine instruction of register size X? """ op = pack.leftmost() + if op.returns_void(): + assert op.is_primitive_store() + arg = op.getarg(2) + return vec_reg_size // arg.bytesize + if op.is_typecast(): if op.casts_down(): return vec_reg_size // op.cast_from_bytesize() @@ -806,13 +738,6 @@ self.operations = ops self.accum = None self.update_pack_of_nodes() - # initializes the type - # TODO - #input_type, output_type = \ - # determine_input_output_types(origin_pack, lnode, forward) - #self.input_type = input_type - #self.output_type = output_type - #assert self.input_type is not None or self.output_type is not None def numops(self): return len(self.operations) @@ -853,6 +778,11 @@ """ left = self.leftmost() if left.returns_void(): + if left.is_primitive_store(): + # make this case more general if it turns out this is + # not the only case where packs need to be trashed + indexarg = left.getarg(2) + return indexarg.bytesize * self.numops() - vec_reg_size return 0 if self.numops() == 0: return -1 @@ -860,7 +790,9 @@ # casting is special, often only takes a half full vector if left.casts_down(): # size is reduced - return left.cast_from_bytesize() * self.numops() - vec_reg_size + size = left.cast_input_bytesize(vec_reg_size) + import pdb; pdb.set_trace() + return left.cast_from_bytesize() * self.numops() - size else: # size is increased return left.cast_to_bytesize() * self.numops() - vec_reg_size @@ -897,7 +829,8 @@ oplist, newoplist = pack.slice_operations(vec_reg_size) pack.operations = oplist pack.update_pack_of_nodes() - assert pack.is_full(vec_reg_size) + if not pack.leftmost().is_typecast(): + assert pack.is_full(vec_reg_size) # newpack = pack.clone(newoplist) load = newpack.pack_load(vec_reg_size) @@ -1195,7 +1128,7 @@ # # box_pos == 0 then it is already at the right place # if box_pos != 0: # args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type) -# self.update_arg_in_vector_pos(i, args[i]) +# remember_args_in_vector(i, args[i]) # #self.update_input_output(self.pack) # continue # else: @@ -1206,13 +1139,13 @@ # if packed < packable and len(vboxes) > 1: # # the argument is scattered along different vector boxes # args[i] = self.gather(vboxes, packable) -# self.update_arg_in_vector_pos(i, args[i]) +# remember_args_in_vector(i, args[i]) # continue # if box_pos != 0: # # The vector box is at a position != 0 but it # # is required to be at position 0. Unpack it! # args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type) -# self.update_arg_in_vector_pos(i, args[i]) +# remember_args_in_vector(i, args[i]) # continue # #self.update_input_output(self.pack) # # diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py --- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py @@ -82,13 +82,14 @@ jitdriver_sd = FakeJitDriverStaticData() opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0) opt.packset = packset - if not prepend_invariant: - state.prepend_invariant_operations = lambda list, _: list opt.combine_packset() opt.schedule(state) # works for now. might be the wrong class? # wrap label + operations + jump it in tree loop otherwise - return state.graph.loop + loop = state.graph.loop + if prepend_invariant: + loop.operations = loop.prefix + loop.operations + return loop class Test(SchedulerBaseTest, LLtypeMixin): @@ -358,13 +359,12 @@ """, False) self.assert_equal(loop2, loop3) - def test_split_load_store(self): loop1 = self.parse_trace(""" - i10 = raw_load_f(p0, i1, descr=float) - i11 = raw_load_f(p0, i2, descr=float) - i12 = raw_load_f(p0, i3, descr=float) - i13 = raw_load_f(p0, i4, descr=float) + i10 = raw_load_i(p0, i1, descr=float) + i11 = raw_load_i(p0, i2, descr=float) + i12 = raw_load_i(p0, i3, descr=float) + i13 = raw_load_i(p0, i4, descr=float) raw_store(p0, i3, i10, descr=float) raw_store(p0, i4, i11, descr=float) """) @@ -454,11 +454,12 @@ def test_combine_packset_nearly_empty_pack(self): trace = self.parse_trace(""" - i10 = int_add(i1, i3) - i11 = int_add(i2, i3) + i10 = int_add(i1, i1) + i11 = int_add(i2, i2) + i12 = int_add(i3, i3) """) pack = self.pack(trace, 0, 2) packset = FakePackSet([pack]) packset.split_overloaded_packs() - assert len(packset.packs) == 0 + assert len(packset.packs) == 1 diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -21,8 +21,7 @@ MemoryRef, Node, IndexVar) from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState, - Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum, - getunpackopnum) + Scheduler, Pack, Pair, AccumPair) from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp, Accum) from rpython.rlib import listsort @@ -45,6 +44,11 @@ def operation_list(self): return [self.label] + self.operations + [self.jump] + def assemble_oplist(self): + oplist = self.prefix + [self.prefix_label] + \ + loop.operations + [loop.jump] + return oplist + def optimize_vector(metainterp_sd, jitdriver_sd, warmstate, loop_info, loop_ops): """ Enter the world of SIMD. Bails if it cannot transform the trace. """ user_code = not jitdriver_sd.vec and warmstate.vec_all @@ -75,7 +79,7 @@ (opt.unroll_count+1, len(version.operations), len(loop.operations), nano)) debug_stop("vec-opt-loop") # - return info, loop.operations + [loop.jump] + return info, loop.assemble_oplist() except NotAVectorizeableLoop: debug_stop("vec-opt-loop") # vectorization is not possible @@ -625,7 +629,7 @@ self.savings += -count def record_vector_pack(self, src, index, count): - if src.gettype() == FLOAT: + if src.datatype == FLOAT: if index == 1 and count == 1: self.savings -= 2 return @@ -826,6 +830,7 @@ def split_overloaded_packs(self): newpacks = [] + import pdb; pdb.set_trace() for i,pack in enumerate(self.packs): load = pack.pack_load(self.vec_reg_size) if load > Pack.FULL: diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -92,7 +92,13 @@ return op def VecOperation(opnum, args, baseop, count, descr=None): - return VecOperationNew(opnum, args, baseop.datatype, baseop.bytesize, baseop.signed, count, descr) + datatype = baseop.datatype + bytesize = baseop.bytesize + if baseop.is_typecast(): + ft,tt = baseop.cast_types() + datatype = tt + bytesize = baseop.cast_to_bytesize() + return VecOperationNew(opnum, args, datatype, bytesize, baseop.signed, count, descr) def VecOperationNew(opnum, args, datateyp, bytesize, signed, count, descr=None): op = ResOperation(opnum, args, descr) @@ -184,7 +190,7 @@ boolreflex = -1 boolinverse = -1 vector = -1 # -1 means, no vector equivalent, -2 it is a vector statement - casts = ('\x00', -1, '\x00', -1) + casts = ('\x00', -1, '\x00', -1, -1) count = -1 def getopnum(self): @@ -271,7 +277,7 @@ memo[self] = num if self.is_vector(): assert isinstance(self, VectorOp) - sres = 'v%d[%dx%s%d] = ' % (num, self.count, self.datatype, self.bytesize * 8) + sres = self.vector_repr(num) + ' = ' else: sres = self.type + str(num) + ' = ' #if self.result is not None: @@ -302,8 +308,7 @@ memo[self] = num if self.is_vector(): assert isinstance(self, VectorOp) - return 'v%d[%dx%s%d]' % (num, self.count, self.datatype, - self.bytesize * 8) + return self.vector_repr(num) return self.type + str(num) def __repr__(self): @@ -451,14 +456,17 @@ def is_typecast(self): return False + def cast_count(self): + return self.casts[4] + def cast_types(self): return self.casts[0], self.casts[2] def cast_to_bytesize(self): - return self.casts[1] + return self.casts[3] def cast_from_bytesize(self): - return self.casts[3] + return self.casts[1] def casts_up(self): return self.cast_to_bytesize() > self.cast_from_bytesize() @@ -657,6 +665,11 @@ return self.bytesize return (type, size) + def cast_input_bytesize(self, vec_reg_size): + count = vec_reg_size // self.cast_to_bytesize() + size = self.cast_from_bytesize() * self.count + return size + class SignExtOp(object): _mixin_ = True @@ -676,11 +689,18 @@ arg = self.getarg(0) return arg.bytesize + def cast_count(self): + return self.casts[4] + + class VectorOp(object): _mixin_ = True - def repr_rpython(self): - return repr_rpython(self, 'bv') + def vector_repr(self, num): + if self.opnum in (rop.VEC_UNPACK_I, rop.VEC_UNPACK_F): + return self.type + str(num) + return 'v%d[%dx%s%d]' % (num, self.count, self.datatype, + self.bytesize * 8) def vector_bytesize(self): assert self.count > 0 @@ -812,7 +832,6 @@ else: raise IndexError - class BinaryOp(object): _mixin_ = True _arg0 = None @@ -1597,3 +1616,13 @@ opnum = rop.VEC_PACK_F return VecOperationNew(opnum, args, datatype, bytesize, signed, count) + @staticmethod + def create_vec_unpack(datatype, args, bytesize, signed, count): + if datatype == 'i': + opnum = rop.VEC_UNPACK_I + else: + assert datatype == 'f' + opnum = rop.VEC_UNPACK_F + return VecOperationNew(opnum, args, datatype, bytesize, signed, count) + + _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit