[pypy-commit] pypy vecopt-merge: further poking the scheduler. resoperations are now fully typed. this makes all the transformation logic much easier and less code, first simple tests pass already

plan_rich Mon, 14 Sep 2015 10:23:45 -0700

Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79637:86dbbde6b191
Date: 2015-09-14 19:21 +0200
http://bitbucket.org/pypy/pypy/changeset/86dbbde6b191/


Log:    further poking the scheduler. resoperations are now fully typed.
        this makes all the transformation logic much easier and less code,
        first simple tests pass already

diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -97,7 +97,6 @@
         """ Emit all the operations into the oplist parameter.
             Initiates the scheduling. """
         assert isinstance(state, SchedulerState)
-        import pdb; pdb.set_trace()
         while state.has_more():
             node = self.next(state)
             if node:
@@ -273,6 +272,37 @@
     #    return self.count
 
 
+
+class TypeOutput(object):
+    def __init__(self, type, count):
+        self.type = type
+        self.count = count
+
+
+    def bytecount(self):
+        return self.count * self.type.bytecount()
+
+class DataTyper(object):
+
+    def infer_type(self, op):
+        # default action, pass through: find the first arg
+        # the output is the same as the first argument!
+        if op.returns_void() or op.argcount() == 0:
+            return
+        arg0 = op.getarg(0)
+        op.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
+
+class PassFirstArg(TypeOutput):
+    def __init__(self):
+        pass
+
+def update_arg_in_vector_pos(state, argidx, box):
+    arguments = [op.getoperation().getarg(argidx) for op in 
self.getoperations()]
+    for i,arg in enumerate(arguments):
+        #if i >= box.count:
+        #    break
+        state.setvector_of_box(arg, i, box)
+
 class TypeRestrict(object):
     ANY_TYPE = -1
     ANY_SIZE = -1
@@ -296,389 +326,433 @@
 
         return True
 
-class TypeOutput(object):
-    def __init__(self, type, count):
-        self.type = type
-        self.count = count
+class trans(object):
+    #DT_PASS = DataTyper()
 
+    TR_ANY = TypeRestrict()
+    TR_ANY_FLOAT = TypeRestrict(FLOAT)
+    TR_ANY_INTEGER = TypeRestrict(INT)
+    TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
+    TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
+    TR_LONG = TypeRestrict(INT, 8, 2)
+    TR_INT_2 = TypeRestrict(INT, 4, 2)
 
-    def bytecount(self):
-        return self.count * self.type.bytecount()
+    #INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), DT_PASS)
+    #FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), DT_PASS)
+    #FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), DT_PASS)
+    #LOAD = LoadToVectorLoad()
+    #STORE = StoreToVectorStore()
+    #GUARD = PassThroughOp((TR_ANY_INTEGER,))
 
-class DataTyper(object):
+    # note that the following definition is x86 arch specific
+    MAPPING = {
+        rop.VEC_INT_ADD:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_SUB:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_MUL:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_AND:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_OR:             [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_XOR:            [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_EQ:             [TR_ANY_INTEGER, TR_ANY_INTEGER],
+        rop.VEC_INT_NE:             [TR_ANY_INTEGER, TR_ANY_INTEGER],
 
-    def infer_type(self, op):
-        # default action, pass through: find the first arg
-        # the output is the same as the first argument!
-        if op.returns_void() or op.argcount() == 0:
-            return
-        arg0 = op.getarg(0)
-        op.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
+        rop.VEC_FLOAT_ADD:          [TR_ANY_FLOAT, TR_ANY_FLOAT],
+        rop.VEC_FLOAT_SUB:          [TR_ANY_FLOAT, TR_ANY_FLOAT],
+        rop.VEC_FLOAT_MUL:          [TR_ANY_FLOAT, TR_ANY_FLOAT],
+        rop.VEC_FLOAT_TRUEDIV:      [TR_ANY_FLOAT, TR_ANY_FLOAT],
+        rop.VEC_FLOAT_ABS:          [TR_ANY_FLOAT],
+        rop.VEC_FLOAT_NEG:          [TR_ANY_FLOAT],
 
-class PassFirstArg(TypeOutput):
-    def __init__(self):
-        pass
+        rop.VEC_RAW_LOAD_I:         [None, None, TR_ANY],
+        rop.VEC_RAW_LOAD_F:         [None, None, TR_ANY],
+        rop.VEC_GETARRAYITEM_RAW_I: [None, None, TR_ANY],
+        rop.VEC_GETARRAYITEM_RAW_F: [None, None, TR_ANY],
+        rop.VEC_GETARRAYITEM_GC_I:  [None, None, TR_ANY],
+        rop.VEC_GETARRAYITEM_GC_F:  [None, None, TR_ANY],
+
+        rop.VEC_RAW_STORE:          [None, None, None, TR_ANY],
+        rop.VEC_SETARRAYITEM_RAW:   [None, None, None, TR_ANY],
+        rop.VEC_SETARRAYITEM_GC:    [None, None, None, TR_ANY],
+
+        rop.GUARD_TRUE:             [TR_ANY_INTEGER],
+        rop.GUARD_FALSE:            [TR_ANY_INTEGER],
+
+        ## irregular
+        rop.VEC_INT_SIGNEXT:        [TR_ANY_INTEGER],
+
+        rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT:  [TR_DOUBLE_2],
+        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT:  [TR_FLOAT_2],
+        rop.VEC_CAST_FLOAT_TO_INT:          [TR_DOUBLE_2],
+        rop.VEC_CAST_INT_TO_FLOAT:          [TR_INT_2],
+
+        rop.VEC_FLOAT_EQ:           [TR_ANY_FLOAT,TR_ANY_FLOAT],
+        rop.VEC_FLOAT_NE:           [TR_ANY_FLOAT,TR_ANY_FLOAT],
+        rop.VEC_INT_IS_TRUE:        [TR_ANY_INTEGER,TR_ANY_INTEGER],
+    }
+
+    # TODO?
+    UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
+                    rop.UINT_LT, rop.UINT_LE,
+                    rop.UINT_GT, rop.UINT_GE)
+
+def turn_to_vector(state, pack):
+    """ Turn a pack into a vector instruction """
+    #
+    # TODO self.check_if_pack_supported(pack)
+    op = pack.leftmost()
+    args = op.getarglist()
+    prepare_arguments(state, pack, args)
+    vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr())
+    for i,node in enumerate(pack.operations):
+        op = node.getoperation()
+        state.setvector_of_box(op,i,vop)
+    #
+    if op.is_guard():
+        assert isinstance(op, GuardResOp)
+        assert isinstance(vop, GuardResOp)
+        vop.setfailargs(op.getfailargs())
+        vop.rd_snapshot = op.rd_snapshot
+    state.costmodel.record_pack_savings(pack, pack.numops())
+    #
+    if pack.is_accumulating():
+        box = oplist[position].result
+        assert box is not None
+        for node in pack.operations:
+            op = node.getoperation()
+            assert not op.returns_void()
+            state.renamer.start_renaming(op, box)
+    #
+    state.oplist.append(vop)
+
+
+def prepare_arguments(state, pack, args):
+    # Transforming one argument to a vector box argument
+    # The following cases can occur:
+    # 1) argument is present in the box_to_vbox map.
+    #    a) vector can be reused immediatly (simple case)
+    #    b) an operation forces the unpacking of a vector
+    # 2) argument is not known to reside in a vector
+    #    a) expand vars/consts before the label and add as argument
+    #    b) expand vars created in the loop body
+    #
+    restrictions = trans.MAPPING[pack.leftmost().vector]
+    for i,arg in enumerate(args):
+        if i >= len(restrictions) or restrictions[i] is None:
+            # ignore this argument
+            continue
+        print "trans", i, "arg", arg
+        if arg.returns_vector():
+            continue
+        pos, vecop = state.getvector_of_box(arg)
+        if not vecop:
+            # 2) constant/variable expand this box
+            # TODO just as one function call
+            vecop = self.expand(arg, i)
+            state.setvector_of_box(arg, 0, vecop)
+            pos = 0
+            continue
+        args[i] = vecop
+        assemble_scattered_values(state, pack, args, i)
+        position_values(state, pack, args, i, arg, pos)
+
+def assemble_scattered_values(state, pack, args, index):
+    vectors = pack.argument_vectors(state, pack, index)
+    if len(vectors) > 1:
+        # the argument is scattered along different vector boxes
+        value = gather(vectors, packable)
+        update_arg_in_vector_pos(state, i, value)
+        args[i] = value
+        #if packed < packable and len(vboxes) > 1:
+        #    # the argument is scattered along different vector boxes
+        #    args[i] = self.gather(vboxes, packable)
+        #    self.update_arg_in_vector_pos(i, args[i])
+        #    continue
+
+def gather(self, vboxes, target_count): # packed < packable and packed < 
stride:
+    (_, box) = vboxes[0]
+    i = 1
+    while i < len(vboxes):
+        (box2_pos, box2) = vboxes[i]
+        if box.getcount() + box2.getcount() <= target_count:
+            box = self.package(box, box.getcount(),
+                               box2, box2_pos, box2.getcount())
+        i += 1
+    return box
+
+def position_values(state, pack, args, index, arg, pos):
+    pass
+        #if pos != 0:
+        #    # The vector box is at a position != 0 but it
+        #    # is required to be at position 0. Unpack it!
+        #    args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
+        #    self.update_arg_in_vector_pos(i, args[i])
+        #    continue
+
+        # convert size i64 -> i32, i32 -> i64, ...
+        # TODO if self.bytesize > 0:
+        #   determine_trans(
+        #   self.input_type.getsize() != vecop.getsize():
+        #    vecop = self.extend(vecop, self.input_type)
+
+        # use the input as an indicator for the pack type
+        #packable = vecop.maximum_numops()
+        #packed = vecop.count
+        #assert packed >= 0
+        #assert packable >= 0
+        #if packed > packable:
+        #    # the argument has more items than the operation is able to 
process!
+        #    # pos == 0 then it is already at the right place
+        #    if pos != 0:
+        #        args[i] = self.unpack(vecop, pos, packed - pos, 
self.input_type)
+        #        self.update_arg_in_vector_pos(i, args[i])
+        #        #self.update_input_output(self.pack)
+        #        continue
+        #    else:
+        #        assert vecop is not None
+        #        args[i] = vecop
+        #        continue
+        #vboxes = self.vector_boxes_for_args(i)
+        #if packed < packable and len(vboxes) > 1:
+        #    # the argument is scattered along different vector boxes
+        #    args[i] = self.gather(vboxes, packable)
+        #    self.update_arg_in_vector_pos(i, args[i])
+        #    continue
+        #if pos != 0:
+        #    # The vector box is at a position != 0 but it
+        #    # is required to be at position 0. Unpack it!
+        #    args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
+        #    self.update_arg_in_vector_pos(i, args[i])
+        #    continue
+        ##
+        #assert vecop is not None
+        #args[i] = vecop
+
+def before_argument_transform(self, args):
+    pass
+
+def check_if_pack_supported(self, pack):
+    op0 = pack.operations[0].getoperation()
+    if self.input_type is None:
+        # must be a load/guard op
+        return
+    insize = self.input_type.getsize()
+    if op0.is_typecast():
+        # prohibit the packing of signext calls that
+        # cast to int16/int8.
+        _, outsize = op0.cast_to()
+        self.sched_data._prevent_signext(outsize, insize)
+    if op0.getopnum() == rop.INT_MUL:
+        if insize == 8 or insize == 1:
+            # see assembler for comment why
+            raise NotAProfitableLoop
+
+#def transform_result(self, result):
+#    if result is None:
+#        return None
+#    vbox = self.new_result_vector_box()
+#    #
+#    # mark the position and the vbox in the hash
+#    for i, node in enumerate(self.getoperations()):
+#        if i >= vbox.getcount():
+#            break
+#        op = node.getoperation()
+#        self.sched_data.setvector_of_box(op, i, vbox)
+#    return vbox
+
+#def new_result_vector_box(self):
+#    type = self.output_type.gettype()
+#    size = self.output_type.getsize()
+#    count = min(self.output_type.getcount(), len(self.pack.operations))
+#    signed = self.output_type.signed
+#    return BoxVector(type, count, size, signed)
+
+#def getoperations(self):
+#    return self.pack.operations
+
+#def transform_arguments(self, args):
+#    """ Transforming one argument to a vector box argument
+#        The following cases can occur:
+#        1) argument is present in the box_to_vbox map.
+#           a) vector can be reused immediatly (simple case)
+#           b) vector is to big
+#           c) vector is to small
+#        2) argument is not known to reside in a vector
+#           a) expand vars/consts before the label and add as argument
+#           b) expand vars created in the loop body
+#    """
+#    for i,arg in enumerate(args):
+#        if arg.returns_vector():
+#            continue
+#        if not self.is_vector_arg(i):
+#            continue
+#        box_pos, vbox = self.sched_data.getvector_of_box(arg)
+#        if not vbox:
+#            # constant/variable expand this box
+#            vbox = self.expand(arg, i)
+#            self.sched_data.setvector_of_box(arg, 0, vbox)
+#            box_pos = 0
+#        # convert size i64 -> i32, i32 -> i64, ...
+#        if self.input_type.getsize() > 0 and \
+#           self.input_type.getsize() != vbox.getsize():
+#            vbox = self.extend(vbox, self.input_type)
+
+#        # use the input as an indicator for the pack type
+#        packable = self.input_type.getcount()
+#        packed = vbox.getcount()
+#        assert packed >= 0
+#        assert packable >= 0
+#        if packed > packable:
+#            # the argument has more items than the operation is able to 
process!
+#            # box_pos == 0 then it is already at the right place
+#            if box_pos != 0:
+#                args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
+#                self.update_arg_in_vector_pos(i, args[i])
+#                #self.update_input_output(self.pack)
+#                continue
+#            else:
+#                assert vbox is not None
+#                args[i] = vbox
+#                continue
+#        vboxes = self.vector_boxes_for_args(i)
+#        if packed < packable and len(vboxes) > 1:
+#            # the argument is scattered along different vector boxes
+#            args[i] = self.gather(vboxes, packable)
+#            self.update_arg_in_vector_pos(i, args[i])
+#            continue
+#        if box_pos != 0:
+#            # The vector box is at a position != 0 but it
+#            # is required to be at position 0. Unpack it!
+#            args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
+#            self.update_arg_in_vector_pos(i, args[i])
+#            continue
+#            #self.update_input_output(self.pack)
+#        #
+#        assert vbox is not None
+#        args[i] = vbox
+
+def extend(self, vbox, newtype):
+    assert vbox.gettype() == newtype.gettype()
+    if vbox.gettype() == INT:
+        return self.extend_int(vbox, newtype)
+    else:
+        raise NotImplementedError("cannot yet extend float")
+
+def extend_int(self, vbox, newtype):
+    vbox_cloned = newtype.new_vector_box(vbox.getcount())
+    self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
+    newsize = newtype.getsize()
+    assert newsize > 0
+    op = ResOperation(rop.VEC_INT_SIGNEXT, 
+                      [vbox, ConstInt(newsize)],
+                      vbox_cloned)
+    self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), 
vbox.getcount())
+    self.vecops.append(op)
+    return vbox_cloned
+
+def unpack(self, vbox, index, count, arg_ptype):
+    """ Extract parts of the vector box into another vector box """
+    assert index < vbox.getcount()
+    assert index + count <= vbox.getcount()
+    assert count > 0
+    vbox_cloned = vectorbox_clone_set(vbox, count=count)
+    opnum = getunpackopnum(vbox.gettype())
+    op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], 
vbox_cloned)
+    self.costmodel.record_vector_unpack(vbox, index, count)
+    self.vecops.append(op)
+    #
+    return vbox_cloned
+
+def package(self, tgt, tidx, src, sidx, scount):
+    """ tgt = [1,2,3,4,_,_,_,_]
+        src = [5,6,_,_]
+        new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2
+    """
+    assert sidx == 0 # restriction
+    count = tgt.getcount() + src.getcount()
+    new_box = vectorbox_clone_set(tgt, count=count)
+    opnum = getpackopnum(tgt.gettype())
+    op = ResOperation(opnum, [tgt, src, ConstInt(tidx), ConstInt(scount)], 
new_box)
+    self.vecops.append(op)
+    self.costmodel.record_vector_pack(src, sidx, scount)
+    if not we_are_translated():
+        self._check_vec_pack(op)
+    return new_box
+
+def _check_vec_pack(self, op):
+    result = op
+    arg0 = op.getarg(0)
+    arg1 = op.getarg(1)
+    index = op.getarg(2)
+    count = op.getarg(3)
+    assert isinstance(result, BoxVector)
+    assert isinstance(arg0, BoxVector)
+    assert isinstance(index, ConstInt)
+    assert isinstance(count, ConstInt)
+    assert arg0.getsize() == result.getsize()
+    if isinstance(arg1, BoxVector):
+        assert arg1.getsize() == result.getsize()
+    else:
+        assert count.value == 1
+    assert index.value < result.getcount()
+    assert index.value + count.value <= result.getcount()
+    assert result.getcount() > arg0.getcount()
+
+def expand(self, arg, argidx):
+    """ Expand a value into a vector box. useful for arith metic
+        of one vector with a scalar (either constant/varialbe)
+    """
+    elem_count = self.input_type.getcount()
+    vbox = self.input_type.new_vector_box(elem_count)
+    box_type = arg.type
+    expanded_map = self.sched_data.expanded_map
+    # note that heterogenous nodes are not yet tracked
+    already_expanded = expanded_map.get(arg, None)
+    if already_expanded:
+        return already_expanded
+
+    ops = self.sched_data.invariant_oplist
+    variables = self.sched_data.invariant_vector_vars
+    if isinstance(arg,Box) and arg not in self.sched_data.inputargs:
+        ops = self.vecops
+        variables = None
+    if isinstance(arg, BoxVector):
+        box_type = arg.gettype()
+
+    for i, node in enumerate(self.getoperations()):
+        op = node.getoperation()
+        if not arg.same_box(op.getarg(argidx)):
+            break
+        i += 1
+    else:
+        expand_opnum = getexpandopnum(box_type)
+        op = ResOperation(expand_opnum, [arg, ConstInt(vbox.item_count)], vbox)
+        ops.append(op)
+        if variables is not None:
+            variables.append(vbox)
+        expanded_map[arg] = vbox
+        return vbox
+
+    op = ResOperation(rop.VEC_BOX, [ConstInt(elem_count)], vbox)
+    ops.append(op)
+    opnum = getpackopnum(arg.type)
+    for i,node in enumerate(self.getoperations()):
+        op = node.getoperation()
+        arg = op.getarg(argidx)
+        new_box = vbox.clonebox()
+        ci = ConstInt(i)
+        c1 = ConstInt(1)
+        op = ResOperation(opnum, [vbox,arg,ci,c1], new_box)
+        vbox = new_box
+        ops.append(op)
+
+    if variables is not None:
+        variables.append(vbox)
+    return vbox
 
 class OpToVectorOp(object):
-    def __init__(self, restrictargs, typeoutput):
-        self.args = list(restrictargs) # do not use a tuple. rpython cannot 
union
-        self.out = typeoutput
-
-    def as_vector_operation(self, state, pack):
-        #
-        # TODO self.check_if_pack_supported(pack)
-        op = pack.leftmost()
-        args = op.getarglist()
-        self.prepare_arguments(state, op.getarglist())
-        vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr())
-        #
-        if op.is_guard():
-            assert isinstance(op, GuardResOp)
-            assert isinstance(vop, GuardResOp)
-            vop.setfailargs(op.getfailargs())
-            vop.rd_snapshot = op.rd_snapshot
-        state.costmodel.record_pack_savings(pack, pack.numops())
-        #
-        if pack.is_accumulating():
-            box = oplist[position].result
-            assert box is not None
-            for node in pack.operations:
-                op = node.getoperation()
-                assert not op.returns_void()
-                scheduler.renamer.start_renaming(op, box)
-        #
-        state.oplist.append(vop)
-
-    def prepare_arguments(self, state, args):
-        self.before_argument_transform(args)
-        # Transforming one argument to a vector box argument
-        # The following cases can occur:
-        # 1) argument is present in the box_to_vbox map.
-        #    a) vector can be reused immediatly (simple case)
-        #    b) vector is to big
-        #    c) vector is to small
-        # 2) argument is not known to reside in a vector
-        #    a) expand vars/consts before the label and add as argument
-        #    b) expand vars created in the loop body
-        #
-        for i,arg in enumerate(args):
-            if arg.returns_vector():
-                continue
-            if not self.transform_arg_at(i):
-                continue
-            box_pos, vbox = state.getvector_of_box(arg)
-            if not vbox:
-                # 2) constant/variable expand this box
-                vbox = self.expand(arg, i)
-                self.sched_data.setvector_of_box(arg, 0, vbox)
-                box_pos = 0
-            # convert size i64 -> i32, i32 -> i64, ...
-            if self.input_type.getsize() > 0 and \
-               self.input_type.getsize() != vbox.getsize():
-                vbox = self.extend(vbox, self.input_type)
-
-            # use the input as an indicator for the pack type
-            packable = self.input_type.getcount()
-            packed = vbox.getcount()
-            assert packed >= 0
-            assert packable >= 0
-            if packed > packable:
-                # the argument has more items than the operation is able to 
process!
-                # box_pos == 0 then it is already at the right place
-                if box_pos != 0:
-                    args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
-                    self.update_arg_in_vector_pos(i, args[i])
-                    #self.update_input_output(self.pack)
-                    continue
-                else:
-                    assert vbox is not None
-                    args[i] = vbox
-                    continue
-            vboxes = self.vector_boxes_for_args(i)
-            if packed < packable and len(vboxes) > 1:
-                # the argument is scattered along different vector boxes
-                args[i] = self.gather(vboxes, packable)
-                self.update_arg_in_vector_pos(i, args[i])
-                continue
-            if box_pos != 0:
-                # The vector box is at a position != 0 but it
-                # is required to be at position 0. Unpack it!
-                args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
-                self.update_arg_in_vector_pos(i, args[i])
-                continue
-                #self.update_input_output(self.pack)
-            #
-            assert vbox is not None
-            args[i] = vbox
-
-    def before_argument_transform(self, args):
+    def __init__(self): #, restrictargs, typeoutput):
         pass
-
-    def check_if_pack_supported(self, pack):
-        op0 = pack.operations[0].getoperation()
-        if self.input_type is None:
-            # must be a load/guard op
-            return
-        insize = self.input_type.getsize()
-        if op0.casts_box():
-            # prohibit the packing of signext calls that
-            # cast to int16/int8.
-            _, outsize = op0.cast_to()
-            self.sched_data._prevent_signext(outsize, insize)
-        if op0.getopnum() == rop.INT_MUL:
-            if insize == 8 or insize == 1:
-                # see assembler for comment why
-                raise NotAProfitableLoop
-
-    def transform_result(self, result):
-        if result is None:
-            return None
-        vbox = self.new_result_vector_box()
-        #
-        # mark the position and the vbox in the hash
-        for i, node in enumerate(self.getoperations()):
-            if i >= vbox.getcount():
-                break
-            op = node.getoperation()
-            self.sched_data.setvector_of_box(op, i, vbox)
-        return vbox
-
-    def new_result_vector_box(self):
-        type = self.output_type.gettype()
-        size = self.output_type.getsize()
-        count = min(self.output_type.getcount(), len(self.pack.operations))
-        signed = self.output_type.signed
-        return BoxVector(type, count, size, signed)
-
-    def getoperations(self):
-        return self.pack.operations
-
-    def transform_arguments(self, args):
-        """ Transforming one argument to a vector box argument
-            The following cases can occur:
-            1) argument is present in the box_to_vbox map.
-               a) vector can be reused immediatly (simple case)
-               b) vector is to big
-               c) vector is to small
-            2) argument is not known to reside in a vector
-               a) expand vars/consts before the label and add as argument
-               b) expand vars created in the loop body
-        """
-        for i,arg in enumerate(args):
-            if arg.returns_vector():
-                continue
-            if not self.is_vector_arg(i):
-                continue
-            box_pos, vbox = self.sched_data.getvector_of_box(arg)
-            if not vbox:
-                # constant/variable expand this box
-                vbox = self.expand(arg, i)
-                self.sched_data.setvector_of_box(arg, 0, vbox)
-                box_pos = 0
-            # convert size i64 -> i32, i32 -> i64, ...
-            if self.input_type.getsize() > 0 and \
-               self.input_type.getsize() != vbox.getsize():
-                vbox = self.extend(vbox, self.input_type)
-
-            # use the input as an indicator for the pack type
-            packable = self.input_type.getcount()
-            packed = vbox.getcount()
-            assert packed >= 0
-            assert packable >= 0
-            if packed > packable:
-                # the argument has more items than the operation is able to 
process!
-                # box_pos == 0 then it is already at the right place
-                if box_pos != 0:
-                    args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
-                    self.update_arg_in_vector_pos(i, args[i])
-                    #self.update_input_output(self.pack)
-                    continue
-                else:
-                    assert vbox is not None
-                    args[i] = vbox
-                    continue
-            vboxes = self.vector_boxes_for_args(i)
-            if packed < packable and len(vboxes) > 1:
-                # the argument is scattered along different vector boxes
-                args[i] = self.gather(vboxes, packable)
-                self.update_arg_in_vector_pos(i, args[i])
-                continue
-            if box_pos != 0:
-                # The vector box is at a position != 0 but it
-                # is required to be at position 0. Unpack it!
-                args[i] = self.unpack(vbox, box_pos, packed - box_pos, 
self.input_type)
-                self.update_arg_in_vector_pos(i, args[i])
-                continue
-                #self.update_input_output(self.pack)
-            #
-            assert vbox is not None
-            args[i] = vbox
-
-    def gather(self, vboxes, target_count): # packed < packable and packed < 
stride:
-        (_, box) = vboxes[0]
-        i = 1
-        while i < len(vboxes):
-            (box2_pos, box2) = vboxes[i]
-            if box.getcount() + box2.getcount() <= target_count:
-                box = self.package(box, box.getcount(),
-                                   box2, box2_pos, box2.getcount())
-            i += 1
-        return box
-
-    def update_arg_in_vector_pos(self, argidx, box):
-        arguments = [op.getoperation().getarg(argidx) for op in 
self.getoperations()]
-        for i,arg in enumerate(arguments):
-            if i >= box.getcount():
-                break
-            self.sched_data.setvector_of_box(arg, i, box)
-
-    def vector_boxes_for_args(self, index):
-        args = [op.getoperation().getarg(index) for op in self.getoperations()]
-        vboxes = []
-        last_vbox = None
-        for arg in args:
-            pos, vbox = self.sched_data.getvector_of_box(arg)
-            if vbox is not last_vbox and vbox is not None:
-                vboxes.append((pos, vbox))
-                last_vbox = vbox
-        return vboxes
-
-
-    def extend(self, vbox, newtype):
-        assert vbox.gettype() == newtype.gettype()
-        if vbox.gettype() == INT:
-            return self.extend_int(vbox, newtype)
-        else:
-            raise NotImplementedError("cannot yet extend float")
-
-    def extend_int(self, vbox, newtype):
-        vbox_cloned = newtype.new_vector_box(vbox.getcount())
-        self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
-        newsize = newtype.getsize()
-        assert newsize > 0
-        op = ResOperation(rop.VEC_INT_SIGNEXT, 
-                          [vbox, ConstInt(newsize)],
-                          vbox_cloned)
-        self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), 
vbox.getcount())
-        self.vecops.append(op)
-        return vbox_cloned
-
-    def unpack(self, vbox, index, count, arg_ptype):
-        """ Extract parts of the vector box into another vector box """
-        assert index < vbox.getcount()
-        assert index + count <= vbox.getcount()
-        assert count > 0
-        vbox_cloned = vectorbox_clone_set(vbox, count=count)
-        opnum = getunpackopnum(vbox.gettype())
-        op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], 
vbox_cloned)
-        self.costmodel.record_vector_unpack(vbox, index, count)
-        self.vecops.append(op)
-        #
-        return vbox_cloned
-
-    def package(self, tgt, tidx, src, sidx, scount):
-        """ tgt = [1,2,3,4,_,_,_,_]
-            src = [5,6,_,_]
-            new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2
-        """
-        assert sidx == 0 # restriction
-        count = tgt.getcount() + src.getcount()
-        new_box = vectorbox_clone_set(tgt, count=count)
-        opnum = getpackopnum(tgt.gettype())
-        op = ResOperation(opnum, [tgt, src, ConstInt(tidx), ConstInt(scount)], 
new_box)
-        self.vecops.append(op)
-        self.costmodel.record_vector_pack(src, sidx, scount)
-        if not we_are_translated():
-            self._check_vec_pack(op)
-        return new_box
-
-    def _check_vec_pack(self, op):
-        result = op
-        arg0 = op.getarg(0)
-        arg1 = op.getarg(1)
-        index = op.getarg(2)
-        count = op.getarg(3)
-        assert isinstance(result, BoxVector)
-        assert isinstance(arg0, BoxVector)
-        assert isinstance(index, ConstInt)
-        assert isinstance(count, ConstInt)
-        assert arg0.getsize() == result.getsize()
-        if isinstance(arg1, BoxVector):
-            assert arg1.getsize() == result.getsize()
-        else:
-            assert count.value == 1
-        assert index.value < result.getcount()
-        assert index.value + count.value <= result.getcount()
-        assert result.getcount() > arg0.getcount()
-
-    def expand(self, arg, argidx):
-        """ Expand a value into a vector box. useful for arith metic
-            of one vector with a scalar (either constant/varialbe)
-        """
-        elem_count = self.input_type.getcount()
-        vbox = self.input_type.new_vector_box(elem_count)
-        box_type = arg.type
-        expanded_map = self.sched_data.expanded_map
-        # note that heterogenous nodes are not yet tracked
-        already_expanded = expanded_map.get(arg, None)
-        if already_expanded:
-            return already_expanded
-
-        ops = self.sched_data.invariant_oplist
-        variables = self.sched_data.invariant_vector_vars
-        if isinstance(arg,Box) and arg not in self.sched_data.inputargs:
-            ops = self.vecops
-            variables = None
-        if isinstance(arg, BoxVector):
-            box_type = arg.gettype()
-
-        for i, node in enumerate(self.getoperations()):
-            op = node.getoperation()
-            if not arg.same_box(op.getarg(argidx)):
-                break
-            i += 1
-        else:
-            expand_opnum = getexpandopnum(box_type)
-            op = ResOperation(expand_opnum, [arg, ConstInt(vbox.item_count)], 
vbox)
-            ops.append(op)
-            if variables is not None:
-                variables.append(vbox)
-            expanded_map[arg] = vbox
-            return vbox
-
-        op = ResOperation(rop.VEC_BOX, [ConstInt(elem_count)], vbox)
-        ops.append(op)
-        opnum = getpackopnum(arg.type)
-        for i,node in enumerate(self.getoperations()):
-            op = node.getoperation()
-            arg = op.getarg(argidx)
-            new_box = vbox.clonebox()
-            ci = ConstInt(i)
-            c1 = ConstInt(1)
-            op = ResOperation(opnum, [vbox,arg,ci,c1], new_box)
-            vbox = new_box
-            ops.append(op)
-
-        if variables is not None:
-            variables.append(vbox)
-        return vbox
-
-    def transform_arg_at(self, i):
-        if i < 0 or i >= len(self.args):
-            return False
-        return self.args[i] is not None
-
-    def get_output_type_given(self, input_type, op):
-        return input_type
-
-    def get_input_type_given(self, output_type, op):
-        return output_type
-
-    def force_input(self, ptype):
-        """ Some operations require a specific count/size,
-            they can force the input type here!
-        """
-        return ptype
+        #self.args = list(restrictargs) # do not use a tuple. rpython cannot 
union
+        #self.out = typeoutput
 
 class OpToVectorOpConv(OpToVectorOp):
     def __init__(self, intype, outtype):
@@ -790,97 +864,31 @@
         raise AssertionError("cannot infer input type from output type")
 
 
-class trans(object):
-    DT_PASS = DataTyper()
 
-    TR_ANY_FLOAT = TypeRestrict(FLOAT)
-    TR_ANY_INTEGER = TypeRestrict(INT)
-    TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
-    TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
-    TR_LONG = TypeRestrict(INT, 8, 2)
-    TR_INT_2 = TypeRestrict(INT, 4, 2)
-
-    INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), DT_PASS)
-    FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), DT_PASS)
-    FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), DT_PASS)
-    LOAD = LoadToVectorLoad()
-    STORE = StoreToVectorStore()
-    GUARD = PassThroughOp((TR_ANY_INTEGER,))
-
-    # note that the following definition is x86 arch specific
-    MAPPING = {
-        rop.VEC_INT_ADD:            INT,
-        rop.VEC_INT_SUB:            INT,
-        rop.VEC_INT_MUL:            INT,
-        rop.VEC_INT_AND:            INT,
-        rop.VEC_INT_OR:             INT,
-        rop.VEC_INT_XOR:            INT,
-        rop.VEC_INT_EQ:             INT,
-        rop.VEC_INT_NE:             INT,
-
-        rop.VEC_FLOAT_ADD:          FLOAT,
-        rop.VEC_FLOAT_SUB:          FLOAT,
-        rop.VEC_FLOAT_MUL:          FLOAT,
-        rop.VEC_FLOAT_TRUEDIV:      FLOAT,
-        rop.VEC_FLOAT_ABS:          FLOAT_UNARY,
-        rop.VEC_FLOAT_NEG:          FLOAT_UNARY,
-
-        rop.VEC_RAW_LOAD_I:         LOAD,
-        rop.VEC_RAW_LOAD_F:         LOAD,
-        rop.VEC_GETARRAYITEM_RAW_I: LOAD,
-        rop.VEC_GETARRAYITEM_RAW_F: LOAD,
-        rop.VEC_GETARRAYITEM_GC_I:  LOAD,
-        rop.VEC_GETARRAYITEM_GC_F:  LOAD,
-
-        rop.VEC_RAW_STORE:          STORE,
-        rop.VEC_SETARRAYITEM_RAW:   STORE,
-        rop.VEC_SETARRAYITEM_GC:    STORE,
-
-        rop.GUARD_TRUE: GUARD,
-        rop.GUARD_FALSE: GUARD,
-
-        # irregular
-        rop.VEC_INT_SIGNEXT: SignExtToVectorOp((TR_ANY_INTEGER,), None),
-
-        rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(TR_DOUBLE_2, 
None), #RESTRICT_2_FLOAT),
-        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(TR_FLOAT_2, None), 
#RESTRICT_2_DOUBLE),
-        rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(TR_DOUBLE_2, None), 
#RESTRICT_2_INT),
-        rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(TR_INT_2, None), 
#RESTRICT_2_DOUBLE),
-
-        rop.VEC_FLOAT_EQ:    OpToVectorOp((TR_ANY_FLOAT,TR_ANY_FLOAT), None),
-        rop.VEC_FLOAT_NE:    OpToVectorOp((TR_ANY_FLOAT,TR_ANY_FLOAT), None),
-        rop.VEC_INT_IS_TRUE: OpToVectorOp((TR_ANY_INTEGER,TR_ANY_INTEGER), 
None), # TR_ANY_INTEGER),
-    }
-
-    # TODO?
-    UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
-                    rop.UINT_LT, rop.UINT_LE,
-                    rop.UINT_GT, rop.UINT_GE)
-
-def determine_input_output_types(pack, node, forward):
-    """ This function is two fold. If moving forward, it
-        gets an input type from the packs output type and returns
-        the transformed packtype.
-
-        Moving backward, the origins pack input type is the output
-        type and the transformation of the packtype (in reverse direction)
-        is the input
-    """
-    op = node.getoperation()
-    op2vecop = determine_trans(op)
-    if forward:
-        input_type = op2vecop.force_input(pack.output_type)
-        output_type = op2vecop.get_output_type_given(input_type, op)
-        if output_type:
-            output_type = output_type.clone()
-    else:
-        # going backwards, things are not that easy anymore
-        output_type = pack.input_type
-        input_type = op2vecop.get_input_type_given(output_type, op)
-        if input_type:
-            input_type = input_type.clone()
-
-    return input_type, output_type
+#def determine_input_output_types(pack, node, forward):
+#    """ This function is two fold. If moving forward, it
+#        gets an input type from the packs output type and returns
+#        the transformed packtype.
+#
+#        Moving backward, the origins pack input type is the output
+#        type and the transformation of the packtype (in reverse direction)
+#        is the input
+#    """
+#    op = node.getoperation()
+#    op2vecop = determine_trans(op)
+#    if forward:
+#        input_type = op2vecop.force_input(pack.output_type)
+#        output_type = op2vecop.get_output_type_given(input_type, op)
+#        if output_type:
+#            output_type = output_type.clone()
+#    else:
+#        # going backwards, things are not that easy anymore
+#        output_type = pack.input_type
+#        input_type = op2vecop.get_input_type_given(output_type, op)
+#        if input_type:
+#            input_type = input_type.clone()
+#
+#    return input_type, output_type
 
 def determine_trans(op):
     op2vecop = trans.MAPPING.get(op.vector, None)
@@ -951,8 +959,8 @@
             assert node.pack.numops() > 1
             for node in node.pack.operations:
                 scheduler.mark_emitted(node, self)
-            op2vecop = determine_trans(node.pack.leftmost())
-            op2vecop.as_vector_operation(self, node.pack)
+            # TODO op2vecop = determine_trans(node.pack.leftmost())
+            turn_to_vector(self, node.pack)
             return True
         return False
 
@@ -1021,39 +1029,22 @@
     def getvector_of_box(self, arg):
         return self.box_to_vbox.get(arg, (-1, None))
 
-    def setvector_of_box(self, box, off, vector):
-        assert off < vector.getcount()
-        assert box.type != 'V'
-        self.box_to_vbox[box] = (off, vector)
+    def setvector_of_box(self, var, off, vector):
+        assert off < vector.count
+        assert not var.is_vector()
+        self.box_to_vbox[var] = (off, vector)
 
 def opcount_filling_vector_register(pack, vec_reg_size):
     """ how many operations of that kind can one execute
         with a machine instruction of register size X?
     """
-    pack_type = pack.input_type
-    if pack_type is None:
-        pack_type = pack.output_type # load operations
-
     op = pack.leftmost()
-    if op.casts_box():
-        count = pack_type.getcount()
-        return count
-    count = vec_reg_size // pack_type.getsize()
-    return count
-
-def maximum_byte_size(pack, vec_reg_size):
-    """ The maxmum size in bytes the operation is able to
-        process with the hardware register and the operation
-        semantics.
-    """
-    op = pack.leftmost()
-    if op.casts_box():
-        # casting is special, often only takes a half full vector
-        pack_type = pack.input_type
-        if pack_type is None:
-            pack_type = pack.output_type # load operations
-        return pack_type.byte_size()
-    return vec_reg_size
+    if op.is_typecast():
+        if op.casts_down():
+            return vec_reg_size // op.cast_from_bytesize()
+        else:
+            return vec_reg_size // op.cast_to_bytesize()
+    return  vec_reg_size // op.bytesize
 
 class Pack(object):
     """ A pack is a set of n statements that are:
@@ -1080,6 +1071,9 @@
     def leftmost(self):
         return self.operations[0].getoperation()
 
+    def rightmost(self):
+        return self.operations[-1].getoperation()
+
     def pack_type(self):
         ptype = self.input_type
         if self.input_type is None:
@@ -1113,14 +1107,15 @@
             return 0
         if self.numops() == 0:
             return -1
-        size = maximum_byte_size(self, vec_reg_size)
-        return left.bytesize * self.numops() - size
-        #if self.input_type is None:
-            # e.g. load operations
-        #    return self.output_type.bytecount(self) - size
-        # default only consider the input type
-        # e.g. store operations, int_add, ...
-        #return self.input_type.bytecount(self) - size
+        if left.is_typecast():
+            # casting is special, often only takes a half full vector
+            if left.casts_down():
+                # size is reduced
+                return left.cast_from_bytesize() * self.numops() - vec_reg_size
+            else:
+                # size is increased
+                return left.cast_to_bytesize() * self.numops() - vec_reg_size
+        return left.bytesize * self.numops() - vec_reg_size
 
     def is_full(self, vec_reg_size):
         """ If one input element times the opcount is equal
@@ -1190,6 +1185,17 @@
                 accum = False
         return rightmost is leftmost and accum
 
+    def argument_vectors(self, state, pack, index):
+        args = [node.getoperation().getarg(index) for node in pack.operations]
+        vectors = []
+        last = None
+        for arg in args:
+            pos, vecop = state.getvector_of_box(arg)
+            if vecop is not last and vecop is not None:
+                vectors.append((pos, vecop))
+                last = vecop
+        return vectors
+
     def __repr__(self):
         if len(self.operations) == 0:
             return "Pack(empty)"
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py 
b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -50,7 +50,7 @@
         else:
             label = loop.operations[0]
             label.setdescr(TargetToken(token))
-        loop = VectorLoop(label, loop.operations[1:-1], loop.operations[-1])
+        loop = VectorLoop(label, loop.operations[0:-1], loop.operations[-1])
         loop.jump.setdescr(token)
         for op in loop.operations:
             if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -67,8 +67,8 @@
         loop.graph = FakeDependencyGraph(loop)
         return loop
 
-    def pack(self, loop, l, r, input_type, output_type):
-        return Pack(loop.graph.nodes[1+l:1+r])
+    def pack(self, loop, l, r, input_type=None, output_type=None):
+        return Pack(loop.graph.nodes[l:r])
 
     def schedule(self, loop, packs, vec_reg_size=16,
                  prepend_invariant=False, overwrite_funcs=None):
@@ -115,6 +115,21 @@
             assert node.count == 1
         # must return here, then the test passed
 
+    def test_split_pack(self):
+        loop1 = self.parse_trace("""
+        f10 = raw_load_f(p0, i0, descr=double)
+        f11 = raw_load_f(p0, i1, descr=double)
+        f12 = raw_load_f(p0, i2, descr=double)
+        """)
+        ps = PackSet(16)
+        ps.packs = [self.pack(loop1, 0, 3)]
+        op1 = ps.packs[0].operations[0]
+        op2 = ps.packs[0].operations[1]
+        ps.split_overloaded_packs()
+        assert len(ps.packs) == 1
+        assert ps.packs[0].leftmost() is op1.getoperation()
+        assert ps.packs[0].rightmost() is op2.getoperation()
+
     def test_schedule_split_load(self):
         loop1 = self.parse_trace("""
         f10 = raw_load_f(p0, i0, descr=float)
@@ -124,10 +139,10 @@
         f14 = raw_load_f(p0, i4, descr=float)
         f15 = raw_load_f(p0, i5, descr=float)
         """)
-        pack1 = self.pack(loop1, 0, 6, None, F32)
+        pack1 = self.pack(loop1, 0, 6)
         loop2 = self.schedule(loop1, [pack1])
         loop3 = self.parse_trace("""
-        v10[4xi32] = vec_raw_load_i(p0, i0, descr=float)
+        v10[4xi32] = vec_raw_load_f(p0, i0, descr=float)
         f10 = raw_load_f(p0, i4, descr=float)
         f11 = raw_load_f(p0, i5, descr=float)
         """, False)
@@ -135,21 +150,21 @@
 
     def test_int_to_float(self):
         loop1 = self.parse_trace("""
-        i10 = raw_load(p0, i0, descr=long)
-        i11 = raw_load(p0, i1, descr=long)
+        i10 = raw_load_i(p0, i0, descr=long)
+        i11 = raw_load_i(p0, i1, descr=long)
         i12 = int_signext(i10, 4)
         i13 = int_signext(i11, 4)
         f10 = cast_int_to_float(i12)
         f11 = cast_int_to_float(i13)
         """)
-        pack1 = self.pack(loop1, 0, 2, None, I64)
-        pack2 = self.pack(loop1, 2, 4, I64, I32_2)
-        pack3 = self.pack(loop1, 4, 6, I32_2, F32_2)
+        pack1 = self.pack(loop1, 0, 2)
+        pack2 = self.pack(loop1, 2, 4)
+        pack3 = self.pack(loop1, 4, 6)
         loop2 = self.schedule(loop1, [pack1, pack2, pack3])
         loop3 = self.parse_trace("""
-        v10[i64|2] = vec_raw_load_i(p0, i0, descr=long)
-        v20[i32|2] = vec_int_signext(v10[i64|2], 4)
-        v30[f64|2] = vec_cast_int_to_float(v20[i32|2])
+        v10[2xi64] = vec_raw_load_i(p0, i0, descr=long)
+        v20[2xi32] = vec_int_signext(v10[2xi64], 4)
+        v30[2xf64] = vec_cast_int_to_float(v20[2xi32])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -161,12 +176,12 @@
         pack1 = self.pack(loop1, 0, 2, I64, I64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v10[i64|2] = vec_box(2)
-        v20[i64|2] = vec_int_pack(v10[i64|2], i0, 0, 1)
-        v30[i64|2] = vec_int_pack(v20[i64|2], i1, 1, 1)
-        v40[i64|2] = vec_int_expand(73,2)
+        v10[2xi64] = vec_box_i()
+        v20[2xi64] = vec_int_pack(v10[2xi64], i0, 0, 1)
+        v30[2xi64] = vec_int_pack(v20[2xi64], i1, 1, 1)
+        v40[2xi64] = vec_int_expand(73,2)
         #
-        v50[i64|2] = vec_int_add(v30[i64|2], v40[i64|2])
+        v50[2xi64] = vec_int_add(v30[2xi64], v40[2xi64])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -177,12 +192,12 @@
         pack1 = self.pack(loop1, 0, 2, F64, F64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v10[f64|2] = vec_box(2)
-        v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
-        v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
-        v40[f64|2] = vec_float_expand(73.0,2)
+        v10[2xf64] = vec_box_f()
+        v20[2xf64] = vec_float_pack(v10[2xf64], f0, 0, 1)
+        v30[2xf64] = vec_float_pack(v20[2xf64], f1, 1, 1)
+        v40[2xf64] = vec_float_expand(73.0,2)
         #
-        v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
+        v50[2xf64] = vec_float_add(v30[2xf64], v40[2xf64])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -197,13 +212,13 @@
         pack2 = self.pack(loop1, 2, 4, F64, F64)
         loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v10[f64|2] = vec_box(2)
-        v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
-        v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
-        v40[f64|2] = vec_float_expand(f5,2) # only expaned once
+        v10[2xf64] = vec_box_f()
+        v20[2xf64] = vec_float_pack(v10[2xf64], f0, 0, 1)
+        v30[2xf64] = vec_float_pack(v20[2xf64], f1, 1, 1)
+        v40[2xf64] = vec_float_expand(f5,2) # only expaned once
         #
-        v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
-        v60[f64|2] = vec_float_add(v50[f64|2], v40[f64|2])
+        v50[2xf64] = vec_float_add(v30[2xf64], v40[2xf64])
+        v60[2xf64] = vec_float_add(v50[2xf64], v40[2xf64])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -217,7 +232,7 @@
         loop1 = self.parse_trace("""
         i10 = int_signext(i1, 4)
         i11 = int_signext(i1, 4)
-        """, additional_args=['v10[i64|2]'])
+        """, additional_args=['v10[2xi64]'])
         pack1 = self.pack(loop1, 0, 2, I64, I32_2)
         var = self.find_input_arg('v10', loop1)
         def i1inv103204(v):
@@ -227,20 +242,20 @@
                                 'getvector_of_box': i1inv103204,
                               })
         loop3 = self.parse_trace("""
-        v11[i32|2] = vec_int_signext(v10[i64|2], 4)
-        """, False, additional_args=['v10[i64|2]'])
+        v11[2xi32] = vec_int_signext(v10[2xi64], 4)
+        """, False, additional_args=['v10[2xi64]'])
         self.assert_equal(loop2, loop3)
 
     def test_cast_float_to_int(self):
         loop1 = self.parse_trace("""
-        f10 = raw_load(p0, i1, descr=double)
-        f11 = raw_load(p0, i2, descr=double)
-        f12 = raw_load(p0, i3, descr=double)
-        f13 = raw_load(p0, i4, descr=double)
-        f14 = raw_load(p0, i5, descr=double)
-        f15 = raw_load(p0, i6, descr=double)
-        f16 = raw_load(p0, i7, descr=double)
-        f17 = raw_load(p0, i8, descr=double)
+        f10 = raw_load_f(p0, i1, descr=double)
+        f11 = raw_load_f(p0, i2, descr=double)
+        f12 = raw_load_f(p0, i3, descr=double)
+        f13 = raw_load_f(p0, i4, descr=double)
+        f14 = raw_load_f(p0, i5, descr=double)
+        f15 = raw_load_f(p0, i6, descr=double)
+        f16 = raw_load_f(p0, i7, descr=double)
+        f17 = raw_load_f(p0, i8, descr=double)
         #
         i10 = cast_float_to_int(f10)
         i11 = cast_float_to_int(f11)
@@ -281,31 +296,31 @@
                                   '_prevent_signext': void
                               })
         loop3 = self.parse_trace("""
-        v10[f64|2] = vec_raw_load_f(p0, i1, descr=double)
-        v11[f64|2] = vec_raw_load_f(p0, i3, descr=double)
-        v12[f64|2] = vec_raw_load_f(p0, i5, descr=double)
-        v13[f64|2] = vec_raw_load_f(p0, i7, descr=double)
-        v14[i32|2] = vec_cast_float_to_int(v10[f64|2])
-        v15[i32|2] = vec_cast_float_to_int(v11[f64|2])
-        v16[i32|2] = vec_cast_float_to_int(v12[f64|2])
-        v17[i32|2] = vec_cast_float_to_int(v13[f64|2])
-        v18[i16|2] = vec_int_signext(v14[i32|2],2)
-        v19[i16|2] = vec_int_signext(v15[i32|2],2)
-        v20[i16|2] = vec_int_signext(v16[i32|2],2)
-        v21[i16|2] = vec_int_signext(v17[i32|2],2)
-        v22[i16|4] = vec_int_pack(v18[i16|2], v19[i16|2], 2, 2)
-        v23[i16|6] = vec_int_pack(v22[i16|4], v20[i16|2], 4, 2)
-        v24[i16|8] = vec_int_pack(v23[i16|6], v21[i16|2], 6, 2)
-        vec_raw_store(p1, i1, v24[i16|8], descr=short)
+        v10[2xf64] = vec_raw_load_f(p0, i1, descr=double)
+        v11[2xf64] = vec_raw_load_f(p0, i3, descr=double)
+        v12[2xf64] = vec_raw_load_f(p0, i5, descr=double)
+        v13[2xf64] = vec_raw_load_f(p0, i7, descr=double)
+        v14[2xi32] = vec_cast_float_to_int(v10[2xf64])
+        v15[2xi32] = vec_cast_float_to_int(v11[2xf64])
+        v16[2xi32] = vec_cast_float_to_int(v12[2xf64])
+        v17[2xi32] = vec_cast_float_to_int(v13[2xf64])
+        v18[2xi16] = vec_int_signext(v14[2xi32],2)
+        v19[2xi16] = vec_int_signext(v15[2xi32],2)
+        v20[2xi16] = vec_int_signext(v16[2xi32],2)
+        v21[2xi16] = vec_int_signext(v17[2xi32],2)
+        v22[4xi16] = vec_int_pack(v18[2xi16], v19[2xi16], 2, 2)
+        v23[6xi16] = vec_int_pack(v22[4xi16], v20[2xi16], 4, 2)
+        v24[8xi16] = vec_int_pack(v23[6xi16], v21[2xi16], 6, 2)
+        vec_raw_store(p1, i1, v24[8xi16], descr=short)
         """, False)
         self.assert_equal(loop2, loop3)
 
     def test_cast_float_to_single_float(self):
         loop1 = self.parse_trace("""
-        f10 = raw_load(p0, i1, descr=double)
-        f11 = raw_load(p0, i2, descr=double)
-        f12 = raw_load(p0, i3, descr=double)
-        f13 = raw_load(p0, i4, descr=double)
+        f10 = raw_load_f(p0, i1, descr=double)
+        f11 = raw_load_f(p0, i2, descr=double)
+        f12 = raw_load_f(p0, i3, descr=double)
+        f13 = raw_load_f(p0, i4, descr=double)
         #
         i10 = cast_float_to_singlefloat(f10)
         i11 = cast_float_to_singlefloat(f11)
@@ -322,19 +337,19 @@
         pack3 = self.pack(loop1, 8, 12, I32, None)
         loop2 = self.schedule(loop1, [pack1,pack2,pack3])
         loop3 = self.parse_trace("""
-        v44[f64|2] = vec_raw_load_f(p0, i1, descr=double) 
-        v45[f64|2] = vec_raw_load_f(p0, i3, descr=double) 
-        v46[i32|2] = vec_cast_float_to_singlefloat(v44[f64|2]) 
-        v47[i32|2] = vec_cast_float_to_singlefloat(v45[f64|2]) 
-        v41[i32|4] = vec_int_pack(v46[i32|2], v47[i32|2], 2, 2) 
-        vec_raw_store(p1, i1, v41[i32|4], descr=float)
+        v44[2xf64] = vec_raw_load_f(p0, i1, descr=double) 
+        v45[2xf64] = vec_raw_load_f(p0, i3, descr=double) 
+        v46[2xi32] = vec_cast_float_to_singlefloat(v44[2xf64]) 
+        v47[2xi32] = vec_cast_float_to_singlefloat(v45[2xf64]) 
+        v41[4xi32] = vec_int_pack(v46[2xi32], v47[2xi32], 2, 2) 
+        vec_raw_store(p1, i1, v41[4xi32], descr=float)
         """, False)
         self.assert_equal(loop2, loop3)
 
     def test_all(self):
         loop1 = self.parse_trace("""
-        i10 = raw_load(p0, i1, descr=long)
-        i11 = raw_load(p0, i2, descr=long)
+        i10 = raw_load_i(p0, i1, descr=long)
+        i11 = raw_load_i(p0, i2, descr=long)
         #
         i12 = int_and(i10, 255)
         i13 = int_and(i11, 255)
@@ -347,20 +362,20 @@
         pack3 = self.pack(loop1, 4, 6, I64, None)
         loop2 = self.schedule(loop1, [pack1,pack2,pack3], 
prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v9[i64|2] = vec_int_expand(255,2)
-        v10[i64|2] = vec_raw_load_i(p0, i1, descr=long)
-        v11[i64|2] = vec_int_and(v10[i64|2], v9[i64|2])
-        guard_true(v11[i64|2]) []
+        v9[2xi64] = vec_int_expand(255,2)
+        v10[2xi64] = vec_raw_load_i(p0, i1, descr=long)
+        v11[2xi64] = vec_int_and(v10[2xi64], v9[2xi64])
+        guard_true(v11[2xi64]) []
         """, False)
         self.assert_equal(loop2, loop3)
 
 
     def test_split_load_store(self):
         loop1 = self.parse_trace("""
-        i10 = raw_load(p0, i1, descr=float)
-        i11 = raw_load(p0, i2, descr=float)
-        i12 = raw_load(p0, i3, descr=float)
-        i13 = raw_load(p0, i4, descr=float)
+        i10 = raw_load_f(p0, i1, descr=float)
+        i11 = raw_load_f(p0, i2, descr=float)
+        i12 = raw_load_f(p0, i3, descr=float)
+        i13 = raw_load_f(p0, i4, descr=float)
         raw_store(p0, i3, i10, descr=float)
         raw_store(p0, i4, i11, descr=float)
         """)
@@ -368,10 +383,10 @@
         pack2 = self.pack(loop1, 4, 6, I32_2, None)
         loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v1[i32|4] = vec_raw_load_i(p0, i1, descr=float)
-        i10 = vec_int_unpack(v1[i32|4], 0, 1)
+        v1[4xi32] = vec_raw_load_i(p0, i1, descr=float)
+        i10 = vec_int_unpack(v1[4xi32], 0, 1)
         raw_store(p0, i3, i10, descr=float)
-        i11 = vec_int_unpack(v1[i32|4], 1, 1)
+        i11 = vec_int_unpack(v1[4xi32], 1, 1)
         raw_store(p0, i4, i11, descr=float)
         """, False)
         # unfortunate ui32 is the type for float32... the unsigned u is for
@@ -386,9 +401,9 @@
         pack1 = self.pack(loop1, 0, 2, I64, I64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v1[i64|2] = vec_int_expand(255,2)
-        v2[i64|2] = vec_int_expand(i1,2)
-        v3[i64|2] = vec_int_and(v1[i64|2], v2[i64|2])
+        v1[2xi64] = vec_int_expand(255,2)
+        v2[2xi64] = vec_int_expand(i1,2)
+        v3[2xi64] = vec_int_and(v1[2xi64], v2[2xi64])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -400,9 +415,9 @@
         pack1 = self.pack(loop1, 0, 2, I64, I64)
         loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v1[i64|2] = vec_int_expand(255, 2)
-        v2[i64|2] = vec_int_expand(i1, 2)
-        v3[i64|2] = vec_int_and(v1[i64|2], v2[i64|2])
+        v1[2xi64] = vec_int_expand(255, 2)
+        v2[2xi64] = vec_int_expand(i1, 2)
+        v3[2xi64] = vec_int_and(v1[2xi64], v2[2xi64])
         """, False)
         self.assert_equal(loop2, loop3)
 
@@ -419,19 +434,19 @@
         pack4 = self.pack(loop1, 4, 6, I64, I64)
         loop2 = self.schedule(loop1, [pack1,pack4], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v1[i64|2] = vec_int_expand(255,2)
-        v2[i64|2] = vec_box(2)
-        v3[i64|2] = vec_int_pack(v2[i64|2], i1, 0, 1)
-        v4[i64|2] = vec_int_pack(v3[i64|2], i2, 1, 1)
-        v5[i64|2] = vec_int_and(v1[i64|2], v4[i64|2])
-        i10 = vec_int_unpack(v5[i64|2], 0, 1)
+        v1[2xi64] = vec_int_expand(255,2)
+        v2[2xi64] = vec_box_i()
+        v3[2xi64] = vec_int_pack(v2[2xi64], i1, 0, 1)
+        v4[2xi64] = vec_int_pack(v3[2xi64], i2, 1, 1)
+        v5[2xi64] = vec_int_and(v1[2xi64], v4[2xi64])
+        i10 = vec_int_unpack(v5[2xi64], 0, 1)
         i12 = uint_floordiv(i10,1)
-        i11 = vec_int_unpack(v5[i64|2], 1, 1)
+        i11 = vec_int_unpack(v5[2xi64], 1, 1)
         i13 = uint_floordiv(i11,1)
-        v6[i64|2] = vec_box(2)
-        v7[i64|2] = vec_int_pack(v6[i64|2], i12, 0, 1)
-        v8[i64|2] = vec_int_pack(v7[i64|2], i13, 1, 1)
-        v9[i64|2] = vec_int_and(v4[i64|2], v8[i64|2])
+        v6[2xi64] = vec_box_i()
+        v7[2xi64] = vec_int_pack(v6[2xi64], i12, 0, 1)
+        v8[2xi64] = vec_int_pack(v7[2xi64], i13, 1, 1)
+        v9[2xi64] = vec_int_and(v4[2xi64], v8[i64])
         """, False)
         self.assert_equal(loop2, loop3)
 
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -614,13 +614,9 @@
         self.savings += benefit_factor * times - cost
 
     def cb_signext(self, pack):
-        op0 = pack.operations[0].getoperation()
-        size = op0.getarg(1).getint()
-        if pack.output_type is None:
-            return 1,0
-        orig_size = pack.output_type.getsize()
-        if size == orig_size:
-            return 0,0
+        left = pack.leftmost()
+        if left.cast_to_bytesize() == left.cast_from_bytesize():
+            return 0, 0
         # no benefit for this operation! needs many x86 instrs
         return 1,0
 
@@ -836,6 +832,8 @@
                 pack.split(newpacks, self.vec_reg_size)
                 continue
             if load < Pack.FULL:
+                for op in pack.operations:
+                    op.priority = -100
                 pack.clear()
                 self.packs[i] = None
                 continue
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -99,7 +99,7 @@
     _attrs_ = ('datatype', 'bytesize', 'signed')
 
     datatype = '\x00'
-    bytesize = -1
+    bytesize = -1 # -1 means the biggest size known to the machine
     signed = True
 
     def inittype(self):
@@ -112,10 +112,17 @@
             descr = self.getdescr()
             type = self.type
             if descr.is_array_of_floats() or descr.concrete_type == 'f':
-                type = FLOAT
+                type = 'f'
             self.bytesize = descr.get_item_size_in_bytes()
             self.sign = descr.is_item_signed()
             self.datatype = type
+        elif self.opnum == rop.INT_SIGNEXT:
+            arg0 = self.getarg(0)
+            arg1 = self.getarg(1)
+            self.setdatatype('i', arg1.value, arg0.signed)
+        elif self.is_typecast():
+            ft,tt = self.cast_types()
+            self.setdatatype(tt, self.cast_to_bytesize(), tt == 'i')
         else:
             # pass through the type of the first input argument
             if self.numargs() == 0:
@@ -123,7 +130,7 @@
             arg0 = self.getarg(0)
             self.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
         assert self.datatype != '\x00'
-        assert self.bytesize > 0
+        #assert self.bytesize > 0
 
     def setdatatype(self, data_type, bytesize, signed):
         self.datatype = data_type
@@ -134,7 +141,7 @@
         sign = '-'
         if not self.signed:
             sign = '+'
-        return 'Type(%s%s, %d)' % (sign, self.type, self.size)
+        return 'Type(%s%s, %d)' % (sign, self.type, self.bytesize)
 
 class AbstractResOpOrInputArg(AbstractValue, Typed):
     _attrs_ = ('_forwarded',)
@@ -159,6 +166,7 @@
     boolinverse = -1
     vector = -1 # -1 means, no vector equivalent, -2 it is a vector statement
     casts = ('\x00', -1, '\x00', -1)
+    count = -1 
 
     def getopnum(self):
         return self.opnum
@@ -409,15 +417,6 @@
     def forget_value(self):
         pass
 
-    def casts_box(self):
-        return False
-
-    def cast_to(self):
-        return ('\x00',-1)
-
-    def cast_from(self):
-        return ('\x00',-1)
-
     def is_label(self):
         return self.getopnum() == rop.LABEL
 
@@ -430,6 +429,26 @@
     def returns_vector(self):
         return self.type != 'v' and self.vector == -2
 
+    def is_typecast(self):
+        return False
+
+    def cast_types(self):
+        return self.casts[0], self.casts[2]
+
+    def cast_to_bytesize(self):
+        return self.casts[1]
+
+    def cast_from_bytesize(self):
+        return self.casts[3]
+
+    def casts_up(self):
+        return self.cast_to_bytesize() > self.cast_from_bytesize()
+
+    def casts_down(self):
+        # includes the cast as noop
+        return self.cast_to_bytesize() <= self.cast_from_bytesize()
+
+
 # ===================
 # Top of the hierachy
 # ===================
@@ -598,7 +617,7 @@
 class CastOp(object):
     _mixin_ = True
 
-    def casts_box(self):
+    def is_typecast(self):
         return True
 
     def cast_to(self):
@@ -614,15 +633,40 @@
         return (to_type,size)
 
     def cast_from(self):
-        return ('\x00',-1)
+        type, size, a, b = self.casts
+        if size == -1:
+            return self.bytesize
+        return (type, size)
+
+class SignExtOp(object):
+    _mixin_ = True
+
+    def is_typecast(self):
+        return True
+
+    def cast_types(self):
+        return self.casts[0], self.casts[2]
+
+    def cast_to_bytesize(self):
+        from rpython.jit.metainterp.history import ConstInt
+        arg = self.getarg(1)
+        assert isinstance(arg, ConstInt)
+        return arg.value
+
+    def cast_from_bytesize(self):
+        arg = self.getarg(0)
+        return arg.bytesize
 
 class VectorOp(object):
     _mixin_ = True
-    _attrs_ = ('count',)
 
     def repr_rpython(self):
         return repr_rpython(self, 'bv')
 
+    def vector_bytesize(self):
+        assert self.count > 0
+        return self.byte_size * self.count
+
     def same_shape(self, other):
         """ NOT_RPYTHON """
         if not other.is_vector():
@@ -675,10 +719,12 @@
 class InputArgInt(IntOp, AbstractInputArg):
     def __init__(self, intval=0):
         self.setint(intval)
+        self.datatype = 'i'
 
 class InputArgFloat(FloatOp, AbstractInputArg):
     def __init__(self, f=longlong.ZEROF):
         self.setfloatstorage(f)
+        self.datatype = 'f'
 
     @staticmethod
     def fromfloat(x):
@@ -687,13 +733,14 @@
 class InputArgRef(RefOp, AbstractInputArg):
     def __init__(self, r=lltype.nullptr(llmemory.GCREF.TO)):
         self.setref_base(r)
+        self.datatype = 'r'
 
     def reset_value(self):
         self.setref_base(lltype.nullptr(llmemory.GCREF.TO))
 
 class InputArgVector(VectorOp, AbstractInputArg):
-    def __init__(self):
-        pass
+    def __init__(self, datatype):
+        self.datatype = datatype
 
     def returns_vector(self):
         return True
@@ -947,11 +994,10 @@
     'VEC_CAST_INT_TO_FLOAT/1/f',
     '_VEC_CAST_LAST',
 
-    'VEC_INT_BOX/1/i',
+    'VEC_BOX/0/if',
     'VEC_INT_UNPACK/3/i',          # iX|fX = VEC_INT_UNPACK(vX, index, 
item_count)
     'VEC_INT_PACK/4/i',            # VEC_INT_PACK(vX, var/const, index, 
item_count)
     'VEC_INT_EXPAND/2/i',          # vX = VEC_INT_EXPAND(var/const, item_count)
-    'VEC_FLOAT_BOX/1/f',
     'VEC_FLOAT_UNPACK/3/f',        # iX|fX = VEC_FLOAT_UNPACK(vX, index, 
item_count)
     'VEC_FLOAT_PACK/4/f',          # VEC_FLOAT_PACK(vX, var/const, index, 
item_count)
     'VEC_FLOAT_EXPAND/2/f',        # vX = VEC_FLOAT_EXPAND(var/const, 
item_count)
@@ -1090,13 +1136,13 @@
 ]
 
 _cast_ops = {
-    'INT_SIGNEXT': ('i', 0, 'i', 0),
     'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4),
     'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8),
     'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4),
     'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8),
-    'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
-    'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
+    'INT_SIGNEXT': ('i', 0, 'i', 0),
+    #'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
+    #'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
 }
 
 # ____________________________________________________________
@@ -1187,6 +1233,8 @@
     else:
         baseclass = PlainResOp
     mixins = [arity2mixin.get(arity, N_aryOp)]
+    if name.startswith('VEC'):
+        mixins.append(VectorOp)
     if result_type == 'i':
         mixins.append(IntOp)
     elif result_type == 'f':
@@ -1196,9 +1244,9 @@
     else:
         assert result_type == 'n'
     if name in _cast_ops:
+        if name == "INT_SIGNEXT":
+            mixins.append(SignExtOp)
         mixins.append(CastOp)
-    if name.startswith('VEC'):
-        mixins.insert(1,VectorOp)
 
     cls_name = '%s_OP' % name
     bases = (get_base_class(tuple(mixins), baseclass),)
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -344,9 +344,19 @@
         if res in self.vars:
             raise ParseError("Double assign to var %s in line: %s" % (res, 
line))
         resop = self.create_op(opnum, args, res, descr, fail_args)
+        self.update_vector_count(resop, res)
         self.vars[res] = resop
         return resop
 
+    def update_vector_count(self, resop, var):
+        pattern = re.compile('.*\[(\d+)x(u?)(i|f)(\d+)\]')
+        match = pattern.match(var)
+        if match:
+            resop.count = int(match.group(1))
+            resop.signed = not (match.group(2) == 'u')
+            resop.datatype = match.group(3)
+            resop.bytesize = int(match.group(4)) // 8
+
     def parse_op_no_result(self, line):
         opnum, args, descr, fail_args = self.parse_op(line)
         res = self.create_op(opnum, args, None, descr, fail_args)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy vecopt-merge: further poking the scheduler. resoperations are now fully typed. this makes all the transformation logic much easier and less code, first simple tests pass already

Reply via email to