[pypy-commit] pypy vecopt-merge: fighting with casting. not quite there, but soon scheduling tests should work

plan_rich Tue, 15 Sep 2015 08:19:10 -0700

Author: Richard Plangger <planri...@gmail.com>
Branch: vecopt-merge
Changeset: r79649:9d05ed8b7873
Date: 2015-09-15 17:17 +0200
http://bitbucket.org/pypy/pypy/changeset/9d05ed8b7873/


Log:    fighting with casting. not quite there, but soon scheduling tests
        should work

diff --git a/rpython/jit/metainterp/optimizeopt/renamer.py 
b/rpython/jit/metainterp/optimizeopt/renamer.py
--- a/rpython/jit/metainterp/optimizeopt/renamer.py
+++ b/rpython/jit/metainterp/optimizeopt/renamer.py
@@ -1,3 +1,5 @@
+from rpython.jit.metainterp import resoperation
+from rpython.jit.metainterp.resume import Snapshot
 
 class Renamer(object):
     def __init__(self):
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -15,11 +15,20 @@
         self.graph = graph
         self.oplist = []
         self.worklist = []
+        self.invariant_oplist = []
+        self.invariant_vector_vars = []
 
     def post_schedule(self):
         loop = self.graph.loop
         self.renamer.rename(loop.jump)
         loop.operations = self.oplist
+        loop.prefix = self.invariant_oplist
+        if len(self.invariant_vector_vars) > 0:
+            # TODO, accum?
+            args = loop.label.getarglist_copy() + self.invariant_vector_vars
+            opnum = loop.label.getopnum()
+            # TODO descr?
+            loop.prefix_label = loop.label.copy_and_change(opnum, args)
 
     def profitable(self):
         return self.costmodel.profitable()
@@ -67,7 +76,7 @@
             Keeps worklist sorted (see priority) """
         op = node.getoperation()
         state.renamer.rename(op)
-        state.unpack_from_vector(op)
+        state.ensure_args_unpacked(op)
         node.position = len(state.oplist)
         worklist = state.worklist
         for dep in node.provides()[:]: # COPY
@@ -105,6 +114,7 @@
                     if not node.emitted:
                         op = node.getoperation()
                         self.mark_emitted(node, state)
+                        state.seen[op] = None
                         state.oplist.append(op)
                 continue
 
@@ -120,52 +130,6 @@
             for node in state.graph.nodes:
                 assert node.emitted
 
-def vectorbox_outof_box(box, count=-1, size=-1, type='-'):
-    if box.type not in (FLOAT, INT):
-        raise AssertionError("cannot create vector box of type %s" % 
(box.type))
-    signed = True
-    if box.type == FLOAT:
-        signed = False
-    return BoxVector(box.type, 2, 8, signed)
-
-def packtype_outof_box(box):
-    if box.type == VECTOR:
-        return Type.of(box)
-    else:
-        if box.type == INT:
-            return Type(INT, 8, True, 2)
-        elif box.type == FLOAT:
-            return Type(FLOAT, 8, False, 2)
-    #
-    raise AssertionError("box %s not supported" % (box,))
-
-def vectorbox_clone_set(box, count=-1, size=-1, type='-', clone_signed=True, 
signed=False):
-    if count == -1:
-        count = box.getcount()
-    if size == -1:
-        size = box.getsize()
-    if type == '-':
-        type = box.gettype()
-    if clone_signed:
-        signed = box.getsigned()
-    return BoxVector(type, count, size, signed)
-
-def getpackopnum(type):
-    if type == INT:
-        return rop.VEC_INT_PACK
-    elif type == FLOAT:
-        return rop.VEC_FLOAT_PACK
-    #
-    raise AssertionError("getpackopnum type %s not supported" % (type,))
-
-def getunpackopnum(type):
-    if type == INT:
-        return rop.VEC_INT_UNPACK
-    elif type == FLOAT:
-        return rop.VEC_FLOAT_UNPACK
-    #
-    raise AssertionError("getunpackopnum type %s not supported" % (type,))
-
 #UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
 #                rop.UINT_LT, rop.UINT_LE,
 #                rop.UINT_GT, rop.UINT_GE)
@@ -275,27 +239,6 @@
     def bytecount(self):
         return self.count * self.type.bytecount()
 
-class DataTyper(object):
-
-    def infer_type(self, op):
-        # default action, pass through: find the first arg
-        # the output is the same as the first argument!
-        if op.returns_void() or op.argcount() == 0:
-            return
-        arg0 = op.getarg(0)
-        op.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
-
-class PassFirstArg(TypeOutput):
-    def __init__(self):
-        pass
-
-def update_arg_in_vector_pos(state, argidx, box):
-    arguments = [op.getoperation().getarg(argidx) for op in 
self.getoperations()]
-    for i,arg in enumerate(arguments):
-        #if i >= box.count:
-        #    break
-        state.setvector_of_box(arg, i, box)
-
 class TypeRestrict(object):
     ANY_TYPE = -1
     ANY_SIZE = -1
@@ -320,7 +263,6 @@
         return True
 
 class trans(object):
-    #DT_PASS = DataTyper()
 
     TR_ANY = TypeRestrict()
     TR_ANY_FLOAT = TypeRestrict(FLOAT)
@@ -355,16 +297,9 @@
         rop.VEC_FLOAT_ABS:          [TR_ANY_FLOAT],
         rop.VEC_FLOAT_NEG:          [TR_ANY_FLOAT],
 
-        rop.VEC_RAW_LOAD_I:         [None, None, TR_ANY],
-        rop.VEC_RAW_LOAD_F:         [None, None, TR_ANY],
-        rop.VEC_GETARRAYITEM_RAW_I: [None, None, TR_ANY],
-        rop.VEC_GETARRAYITEM_RAW_F: [None, None, TR_ANY],
-        rop.VEC_GETARRAYITEM_GC_I:  [None, None, TR_ANY],
-        rop.VEC_GETARRAYITEM_GC_F:  [None, None, TR_ANY],
-
-        rop.VEC_RAW_STORE:          [None, None, None, TR_ANY],
-        rop.VEC_SETARRAYITEM_RAW:   [None, None, None, TR_ANY],
-        rop.VEC_SETARRAYITEM_GC:    [None, None, None, TR_ANY],
+        rop.VEC_RAW_STORE:          [None, None, TR_ANY],
+        rop.VEC_SETARRAYITEM_RAW:   [None, None, TR_ANY],
+        rop.VEC_SETARRAYITEM_GC:    [None, None, TR_ANY],
 
         rop.GUARD_TRUE:             [TR_ANY_INTEGER],
         rop.GUARD_FALSE:            [TR_ANY_INTEGER],
@@ -427,12 +362,13 @@
     #    a) expand vars/consts before the label and add as argument
     #    b) expand vars created in the loop body
     #
-    restrictions = trans.MAPPING[pack.leftmost().vector]
+    restrictions = trans.MAPPING.get(pack.leftmost().vector, [])
+    if not restrictions:
+        return
     for i,arg in enumerate(args):
         if i >= len(restrictions) or restrictions[i] is None:
             # ignore this argument
             continue
-        print "trans", i, "arg", arg
         if arg.returns_vector():
             continue
         pos, vecop = state.getvector_of_box(arg)
@@ -442,40 +378,32 @@
             continue
         args[i] = vecop
         assemble_scattered_values(state, pack, args, i)
-        position_values(state, pack, args, i, arg, pos)
+        position_values(state, pack, args, i, pos)
 
 def assemble_scattered_values(state, pack, args, index):
     vectors = pack.argument_vectors(state, pack, index)
     if len(vectors) > 1:
         # the argument is scattered along different vector boxes
-        value = gather(vectors, packable)
-        update_arg_in_vector_pos(state, i, value)
-        args[i] = value
-        #if packed < packable and len(vboxes) > 1:
-        #    # the argument is scattered along different vector boxes
-        #    args[i] = self.gather(vboxes, packable)
-        #    self.update_arg_in_vector_pos(i, args[i])
-        #    continue
+        args[index] = gather(state, vectors, pack.numops())
+        state.remember_args_in_vector(pack, index, args[index])
 
-def gather(self, vboxes, target_count): # packed < packable and packed < 
stride:
-    (_, box) = vboxes[0]
+def gather(state, vectors, count): # packed < packable and packed < stride:
+    (_, arg) = vectors[0]
     i = 1
-    while i < len(vboxes):
-        (box2_pos, box2) = vboxes[i]
-        if box.getcount() + box2.getcount() <= target_count:
-            box = self.package(box, box.getcount(),
-                               box2, box2_pos, box2.getcount())
+    while i < len(vectors):
+        (newarg_pos, newarg) = vectors[i]
+        if arg.count + newarg.count <= count:
+            arg = pack_into_vector(state, arg, arg.count, newarg, newarg_pos, 
newarg.count)
         i += 1
-    return box
+    return arg
 
-def position_values(state, pack, args, index, arg, pos):
-    pass
-        #if pos != 0:
-        #    # The vector box is at a position != 0 but it
-        #    # is required to be at position 0. Unpack it!
-        #    args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
-        #    self.update_arg_in_vector_pos(i, args[i])
-        #    continue
+def position_values(state, pack, args, index, position):
+    if position != 0:
+        # The vector box is at a position != 0 but it
+        # is required to be at position 0. Unpack it!
+        arg = args[index]
+        args[index] = unpack_from_vector(state, arg, position, arg.count - 
position)
+        state.remember_args_in_vector(pack, index, args[index])
 
         # convert size i64 -> i32, i32 -> i64, ...
         # TODO if self.bytesize > 0:
@@ -493,7 +421,7 @@
         #    # pos == 0 then it is already at the right place
         #    if pos != 0:
         #        args[i] = self.unpack(vecop, pos, packed - pos, 
self.input_type)
-        #        self.update_arg_in_vector_pos(i, args[i])
+        #        state.remember_args_in_vector(i, args[i])
         #        #self.update_input_output(self.pack)
         #        continue
         #    else:
@@ -504,13 +432,13 @@
         #if packed < packable and len(vboxes) > 1:
         #    # the argument is scattered along different vector boxes
         #    args[i] = self.gather(vboxes, packable)
-        #    self.update_arg_in_vector_pos(i, args[i])
+        #    state.remember_args_in_vector(i, args[i])
         #    continue
         #if pos != 0:
         #    # The vector box is at a position != 0 but it
         #    # is required to be at position 0. Unpack it!
         #    args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
-        #    self.update_arg_in_vector_pos(i, args[i])
+        #    state.remember_args_in_vector(i, args[i])
         #    continue
         ##
         #assert vecop is not None
@@ -551,53 +479,50 @@
     self.vecops.append(op)
     return vbox_cloned
 
-def unpack(self, vbox, index, count, arg_ptype):
+def unpack_from_vector(state, arg, index, count):
     """ Extract parts of the vector box into another vector box """
-    assert index < vbox.getcount()
-    assert index + count <= vbox.getcount()
+    print "unpack i", index, "c", count, "v", arg
     assert count > 0
-    vbox_cloned = vectorbox_clone_set(vbox, count=count)
-    opnum = getunpackopnum(vbox.gettype())
-    op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], 
vbox_cloned)
-    self.costmodel.record_vector_unpack(vbox, index, count)
-    self.vecops.append(op)
-    #
-    return vbox_cloned
+    assert index + count <= arg.count
+    args = [arg, ConstInt(index), ConstInt(count)]
+    vecop = OpHelpers.create_vec_unpack(arg.type, args, arg.bytesize,
+                                        arg.signed, count)
+    state.costmodel.record_vector_unpack(arg, index, count)
+    state.oplist.append(vecop)
+    return vecop
 
-def package(self, tgt, tidx, src, sidx, scount):
+def pack_into_vector(state, tgt, tidx, src, sidx, scount):
     """ tgt = [1,2,3,4,_,_,_,_]
         src = [5,6,_,_]
         new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2
     """
     assert sidx == 0 # restriction
-    count = tgt.getcount() + src.getcount()
-    new_box = vectorbox_clone_set(tgt, count=count)
-    opnum = getpackopnum(tgt.gettype())
-    op = ResOperation(opnum, [tgt, src, ConstInt(tidx), ConstInt(scount)], 
new_box)
-    self.vecops.append(op)
-    self.costmodel.record_vector_pack(src, sidx, scount)
+    newcount = tgt.count + scount
+    args = [tgt, src, ConstInt(tidx), ConstInt(scount)]
+    vecop = OpHelpers.create_vec_pack(tgt.type, args, tgt.bytesize, 
tgt.signed, newcount)
+    state.oplist.append(vecop)
+    state.costmodel.record_vector_pack(src, sidx, scount)
     if not we_are_translated():
-        self._check_vec_pack(op)
-    return new_box
+        _check_vec_pack(vecop)
+    return vecop
 
-def _check_vec_pack(self, op):
-    result = op
+def _check_vec_pack(op):
     arg0 = op.getarg(0)
     arg1 = op.getarg(1)
     index = op.getarg(2)
     count = op.getarg(3)
-    assert isinstance(result, BoxVector)
-    assert isinstance(arg0, BoxVector)
-    assert isinstance(index, ConstInt)
+    assert op.is_vector()
+    assert arg0.is_vector()
+    assert index.is_constant()
     assert isinstance(count, ConstInt)
-    assert arg0.getsize() == result.getsize()
-    if isinstance(arg1, BoxVector):
-        assert arg1.getsize() == result.getsize()
+    assert arg0.bytesize == op.bytesize
+    if arg1.is_vector():
+        assert arg1.bytesize == op.bytesize
     else:
         assert count.value == 1
-    assert index.value < result.getcount()
-    assert index.value + count.value <= result.getcount()
-    assert result.getcount() > arg0.getcount()
+    assert index.value < op.count
+    assert index.value + count.value <= op.count
+    assert op.count > arg0.count
 
 def expand(state, pack, args, arg, index):
     """ Expand a value into a vector box. useful for arith metic
@@ -610,7 +535,8 @@
     ops = state.invariant_oplist
     variables = state.invariant_vector_vars
     if not arg.is_constant() and arg not in state.inputargs:
-        ops = self.vecops
+        # cannot be created before the loop, expand inline
+        ops = state.oplist
         variables = None
 
     for i, node in enumerate(pack.operations):
@@ -620,29 +546,30 @@
         i += 1
     else:
         # note that heterogenous nodes are not yet tracked
-        already_expanded = expanded_map.get(arg, None)
-        if already_expanded:
-            return already_expanded
+        vecop = expanded_map.get(arg, None)
+        if vecop:
+            args[index] = vecop
+            return vecop
         vecop = OpHelpers.create_vec_expand(arg, op.bytesize, op.signed, 
pack.numops())
-        state.oplist.append(vecop)
+        ops.append(vecop)
         if variables is not None:
             variables.append(vecop)
         expanded_map[arg] = vecop
-        for i in range(vecop.count):
-            state.setvector_of_box(arg, i, vecop)
+        #for i in range(vecop.count):
+        #    state.setvector_of_box(arg, i, vecop)
         args[index] = vecop
         return vecop
 
     vecop = OpHelpers.create_vec(arg.type, left.bytesize, left.signed)
-    state.oplist.append(vecop)
+    ops.append(vecop)
     for i,node in enumerate(pack.operations):
         op = node.getoperation()
         arg = op.getarg(index)
         arguments = [vecop, arg, ConstInt(i), ConstInt(1)]
         vecop = OpHelpers.create_vec_pack(arg.type, arguments, left.bytesize,
                                           left.signed, vecop.count+1)
-        state.setvector_of_box(arg, i, vecop)
-        state.oplist.append(vecop)
+        #state.setvector_of_box(arg, i, vecop)
+        ops.append(vecop)
 
     if variables is not None:
         variables.append(vecop)
@@ -654,8 +581,6 @@
         self.box_to_vbox = {}
         self.cpu = cpu
         self.vec_reg_size = cpu.vector_register_size
-        self.invariant_oplist = []
-        self.invariant_vector_vars = []
         self.expanded_map = {}
         self.costmodel = costmodel
         self.inputargs = {}
@@ -666,7 +591,7 @@
 
     def post_schedule(self):
         loop = self.graph.loop
-        self.unpack_from_vector(loop.jump)
+        self.ensure_args_unpacked(loop.jump)
         SchedulerState.post_schedule(self)
 
         # add accumulation info to the descriptor
@@ -727,47 +652,41 @@
                         return True
         return False
 
-    def unpack_from_vector(self, op):
+    def ensure_args_unpacked(self, op):
         """ If a box is needed that is currently stored within a vector
             box, this utility creates a unpacking instruction.
         """
-        args = op.getarglist()
-
         # unpack for an immediate use
-        for i, arg in enumerate(op.getarglist()):
-            if not arg.is_constant():
-                argument = self._unpack_from_vector(i, arg)
-                if arg is not argument:
-                    op.setarg(i, argument)
-        if not op.returns_void():
-            self.seen[op] = None
+        for i, argument in enumerate(op.getarglist()):
+            if not argument.is_constant():
+                arg = self.ensure_unpacked(i, argument)
+                if argument is not arg:
+                    op.setarg(i, arg)
         # unpack for a guard exit
         if op.is_guard():
+            # could be moved to the guard exit
             fail_args = op.getfailargs()
-            for i, arg in enumerate(fail_args):
-                if arg and not arg.is_constant():
-                    argument = self._unpack_from_vector(i, arg)
-                    if arg is not argument:
-                        fail_args[i] = argument
+            for i, argument in enumerate(fail_args):
+                if argument and not argument.is_constant():
+                    arg = self.ensure_unpacked(i, argument)
+                    if argument is not arg:
+                        fail_arguments[i] = arg
 
-    def _unpack_from_vector(self, i, arg):
-        if arg in self.seen or arg.type == 'V':
+    def ensure_unpacked(self, index, arg):
+        if arg in self.seen or not arg.is_vector():
             return arg
-        (j, vbox) = self.getvector_of_box(arg)
-        if vbox:
-            if vbox in self.invariant_vector_vars:
+        (pos, var) = self.getvector_of_box(arg)
+        if var:
+            if var in self.invariant_vector_vars:
                 return arg
-            arg_cloned = arg.clonebox()
-            self.seen[arg_cloned] = None
-            self.renamer.start_renaming(arg, arg_cloned)
-            self.setvector_of_box(arg_cloned, j, vbox)
-            cj = ConstInt(j)
-            ci = ConstInt(1)
-            opnum = getunpackopnum(vbox.gettype())
-            unpack_op = ResOperation(opnum, [vbox, cj, ci], arg_cloned)
-            self.costmodel.record_vector_unpack(vbox, j, 1)
-            self.oplist.append(unpack_op)
-            return arg_cloned
+            args = [var, ConstInt(pos), ConstInt(1)]
+            vecop = OpHelpers.create_vec_unpack(var.type, args, var.bytesize,
+                                                var.signed, 1)
+            self.renamer.start_renaming(arg, vecop)
+            self.seen[vecop] = None
+            self.costmodel.record_vector_unpack(var, pos, 1)
+            self.oplist.append(vecop)
+            return vecop
         return arg
 
     def _prevent_signext(self, outsize, insize):
@@ -783,11 +702,24 @@
         assert not var.is_vector()
         self.box_to_vbox[var] = (off, vector)
 
+    def remember_args_in_vector(self, pack, index, box):
+        arguments = [op.getoperation().getarg(index) for op in pack.operations]
+        for i,arg in enumerate(arguments):
+            if i >= box.count:
+                break
+            self.setvector_of_box(arg, i, box)
+
+
 def opcount_filling_vector_register(pack, vec_reg_size):
     """ how many operations of that kind can one execute
         with a machine instruction of register size X?
     """
     op = pack.leftmost()
+    if op.returns_void():
+        assert op.is_primitive_store()
+        arg = op.getarg(2)
+        return vec_reg_size // arg.bytesize
+
     if op.is_typecast():
         if op.casts_down():
             return vec_reg_size // op.cast_from_bytesize()
@@ -806,13 +738,6 @@
         self.operations = ops
         self.accum = None
         self.update_pack_of_nodes()
-        # initializes the type
-        # TODO
-        #input_type, output_type = \
-        #    determine_input_output_types(origin_pack, lnode, forward)
-        #self.input_type = input_type
-        #self.output_type = output_type
-        #assert self.input_type is not None or self.output_type is not None
 
     def numops(self):
         return len(self.operations)
@@ -853,6 +778,11 @@
         """
         left = self.leftmost()
         if left.returns_void():
+            if left.is_primitive_store():
+                # make this case more general if it turns out this is
+                # not the only case where packs need to be trashed
+                indexarg = left.getarg(2)
+                return indexarg.bytesize * self.numops() - vec_reg_size
             return 0
         if self.numops() == 0:
             return -1
@@ -860,7 +790,9 @@
             # casting is special, often only takes a half full vector
             if left.casts_down():
                 # size is reduced
-                return left.cast_from_bytesize() * self.numops() - vec_reg_size
+                size = left.cast_input_bytesize(vec_reg_size)
+                import pdb; pdb.set_trace()
+                return left.cast_from_bytesize() * self.numops() - size
             else:
                 # size is increased
                 return left.cast_to_bytesize() * self.numops() - vec_reg_size
@@ -897,7 +829,8 @@
             oplist, newoplist = pack.slice_operations(vec_reg_size)
             pack.operations = oplist
             pack.update_pack_of_nodes()
-            assert pack.is_full(vec_reg_size)
+            if not pack.leftmost().is_typecast():
+                assert pack.is_full(vec_reg_size)
             #
             newpack = pack.clone(newoplist)
             load = newpack.pack_load(vec_reg_size)
@@ -1195,7 +1128,7 @@
 #            # box_pos == 0 then it is already at the right place
 #            if box_pos != 0:
 #                args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
-#                self.update_arg_in_vector_pos(i, args[i])
+#                remember_args_in_vector(i, args[i])
 #                #self.update_input_output(self.pack)
 #                continue
 #            else:
@@ -1206,13 +1139,13 @@
 #        if packed < packable and len(vboxes) > 1:
 #            # the argument is scattered along different vector boxes
 #            args[i] = self.gather(vboxes, packable)
-#            self.update_arg_in_vector_pos(i, args[i])
+#            remember_args_in_vector(i, args[i])
 #            continue
 #        if box_pos != 0:
 #            # The vector box is at a position != 0 but it
 #            # is required to be at position 0. Unpack it!
 #            args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
-#            self.update_arg_in_vector_pos(i, args[i])
+#            remember_args_in_vector(i, args[i])
 #            continue
 #            #self.update_input_output(self.pack)
 #        #
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -82,13 +82,14 @@
         jitdriver_sd = FakeJitDriverStaticData()
         opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0)
         opt.packset = packset
-        if not prepend_invariant:
-            state.prepend_invariant_operations = lambda list, _: list
         opt.combine_packset()
         opt.schedule(state)
         # works for now. might be the wrong class?
         # wrap label + operations + jump it in tree loop otherwise
-        return state.graph.loop
+        loop = state.graph.loop
+        if prepend_invariant:
+            loop.operations = loop.prefix + loop.operations
+        return loop
 
 class Test(SchedulerBaseTest, LLtypeMixin):
 
@@ -358,13 +359,12 @@
         """, False)
         self.assert_equal(loop2, loop3)
 
-
     def test_split_load_store(self):
         loop1 = self.parse_trace("""
-        i10 = raw_load_f(p0, i1, descr=float)
-        i11 = raw_load_f(p0, i2, descr=float)
-        i12 = raw_load_f(p0, i3, descr=float)
-        i13 = raw_load_f(p0, i4, descr=float)
+        i10 = raw_load_i(p0, i1, descr=float)
+        i11 = raw_load_i(p0, i2, descr=float)
+        i12 = raw_load_i(p0, i3, descr=float)
+        i13 = raw_load_i(p0, i4, descr=float)
         raw_store(p0, i3, i10, descr=float)
         raw_store(p0, i4, i11, descr=float)
         """)
@@ -454,11 +454,12 @@
 
     def test_combine_packset_nearly_empty_pack(self):
         trace = self.parse_trace("""
-        i10 = int_add(i1, i3)
-        i11 = int_add(i2, i3)
+        i10 = int_add(i1, i1)
+        i11 = int_add(i2, i2)
+        i12 = int_add(i3, i3)
         """)
         pack = self.pack(trace, 0, 2)
         packset = FakePackSet([pack])
         packset.split_overloaded_packs()
-        assert len(packset.packs) == 0
+        assert len(packset.packs) == 1
 
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -21,8 +21,7 @@
         MemoryRef, Node, IndexVar)
 from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo
 from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState,
-        Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum,
-        getunpackopnum)
+        Scheduler, Pack, Pair, AccumPair)
 from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
 from rpython.jit.metainterp.resoperation import (rop, ResOperation, 
GuardResOp, Accum)
 from rpython.rlib import listsort
@@ -45,6 +44,11 @@
     def operation_list(self):
         return [self.label] + self.operations + [self.jump]
 
+    def assemble_oplist(self):
+        oplist = self.prefix + [self.prefix_label] + \
+                 loop.operations + [loop.jump]
+        return oplist
+
 def optimize_vector(metainterp_sd, jitdriver_sd, warmstate, loop_info, 
loop_ops):
     """ Enter the world of SIMD. Bails if it cannot transform the trace. """
     user_code = not jitdriver_sd.vec and warmstate.vec_all
@@ -75,7 +79,7 @@
                       (opt.unroll_count+1, len(version.operations), 
len(loop.operations), nano))
         debug_stop("vec-opt-loop")
         #
-        return info, loop.operations + [loop.jump]
+        return info, loop.assemble_oplist()
     except NotAVectorizeableLoop:
         debug_stop("vec-opt-loop")
         # vectorization is not possible
@@ -625,7 +629,7 @@
         self.savings += -count
 
     def record_vector_pack(self, src, index, count):
-        if src.gettype() == FLOAT:
+        if src.datatype == FLOAT:
             if index == 1 and count == 1:
                 self.savings -= 2
                 return
@@ -826,6 +830,7 @@
 
     def split_overloaded_packs(self):
         newpacks = []
+        import pdb; pdb.set_trace()
         for i,pack in enumerate(self.packs):
             load = pack.pack_load(self.vec_reg_size)
             if load > Pack.FULL:
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -92,7 +92,13 @@
     return op
 
 def VecOperation(opnum, args, baseop, count, descr=None):
-    return VecOperationNew(opnum, args, baseop.datatype, baseop.bytesize, 
baseop.signed, count, descr)
+    datatype = baseop.datatype
+    bytesize = baseop.bytesize
+    if baseop.is_typecast():
+        ft,tt = baseop.cast_types()
+        datatype = tt
+        bytesize = baseop.cast_to_bytesize()
+    return VecOperationNew(opnum, args, datatype, bytesize, baseop.signed, 
count, descr)
 
 def VecOperationNew(opnum, args, datateyp, bytesize, signed, count, 
descr=None):
     op = ResOperation(opnum, args, descr)
@@ -184,7 +190,7 @@
     boolreflex = -1
     boolinverse = -1
     vector = -1 # -1 means, no vector equivalent, -2 it is a vector statement
-    casts = ('\x00', -1, '\x00', -1)
+    casts = ('\x00', -1, '\x00', -1, -1)
     count = -1 
 
     def getopnum(self):
@@ -271,7 +277,7 @@
                 memo[self] = num
             if self.is_vector():
                 assert isinstance(self, VectorOp)
-                sres = 'v%d[%dx%s%d] = ' % (num, self.count, self.datatype, 
self.bytesize * 8)
+                sres = self.vector_repr(num) + ' = '
             else:
                 sres = self.type + str(num) + ' = '
         #if self.result is not None:
@@ -302,8 +308,7 @@
             memo[self] = num
         if self.is_vector():
             assert isinstance(self, VectorOp)
-            return 'v%d[%dx%s%d]' % (num, self.count, self.datatype,
-                                     self.bytesize * 8)
+            return self.vector_repr(num)
         return self.type + str(num)
 
     def __repr__(self):
@@ -451,14 +456,17 @@
     def is_typecast(self):
         return False
 
+    def cast_count(self):
+        return self.casts[4]
+
     def cast_types(self):
         return self.casts[0], self.casts[2]
 
     def cast_to_bytesize(self):
-        return self.casts[1]
+        return self.casts[3]
 
     def cast_from_bytesize(self):
-        return self.casts[3]
+        return self.casts[1]
 
     def casts_up(self):
         return self.cast_to_bytesize() > self.cast_from_bytesize()
@@ -657,6 +665,11 @@
             return self.bytesize
         return (type, size)
 
+    def cast_input_bytesize(self, vec_reg_size):
+        count = vec_reg_size // self.cast_to_bytesize()
+        size = self.cast_from_bytesize() * self.count
+        return size
+
 class SignExtOp(object):
     _mixin_ = True
 
@@ -676,11 +689,18 @@
         arg = self.getarg(0)
         return arg.bytesize
 
+    def cast_count(self):
+        return self.casts[4]
+
+
 class VectorOp(object):
     _mixin_ = True
 
-    def repr_rpython(self):
-        return repr_rpython(self, 'bv')
+    def vector_repr(self, num):
+        if self.opnum in (rop.VEC_UNPACK_I, rop.VEC_UNPACK_F):
+            return self.type + str(num)
+        return 'v%d[%dx%s%d]' % (num, self.count, self.datatype,
+                                 self.bytesize * 8)
 
     def vector_bytesize(self):
         assert self.count > 0
@@ -812,7 +832,6 @@
         else:
             raise IndexError
 
-
 class BinaryOp(object):
     _mixin_ = True
     _arg0 = None
@@ -1597,3 +1616,13 @@
             opnum = rop.VEC_PACK_F
         return VecOperationNew(opnum, args, datatype, bytesize, signed, count)
 
+    @staticmethod
+    def create_vec_unpack(datatype, args, bytesize, signed, count):
+        if datatype == 'i':
+            opnum = rop.VEC_UNPACK_I
+        else:
+            assert datatype == 'f'
+            opnum = rop.VEC_UNPACK_F
+        return VecOperationNew(opnum, args, datatype, bytesize, signed, count)
+
+
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy vecopt-merge: fighting with casting. not quite there, but soon scheduling tests should work

Reply via email to