Author: Richard Plangger <planri...@gmail.com>
Branch: vecopt-merge
Changeset: r79737:664117c201a8
Date: 2015-09-21 11:40 +0200
http://bitbucket.org/pypy/pypy/changeset/664117c201a8/

Log:    vecopt.py tests passing again, now let's finally head to the
        assembler

diff --git a/rpython/jit/metainterp/optimizeopt/guard.py 
b/rpython/jit/metainterp/optimizeopt/guard.py
--- a/rpython/jit/metainterp/optimizeopt/guard.py
+++ b/rpython/jit/metainterp/optimizeopt/guard.py
@@ -91,7 +91,7 @@
         guard.setdescr(descr.clone())
         guard.setarg(0, box_result)
         label = loop.find_first(rop.LABEL)
-        guard.setfailargs(label.getarglist())
+        guard.setfailargs(label.getarglist()[:])
         opt.emit_operation(guard)
 
         return guard
@@ -120,7 +120,7 @@
         descr = myop.getdescr()
         descr.copy_all_attributes_from(other.op.getdescr())
         myop.rd_frame_info_list = otherop.rd_frame_info_list
-        myop.setfailargs(otherop.getfailargs())
+        myop.setfailargs(otherop.getfailargs()[:])
         myop.rd_snapshot = otherop.rd_snapshot
 
     def emit_varops(self, opt, var, old_arg):
@@ -140,6 +140,7 @@
         opt.emit_operation(cmp_op)
         # emit that actual guard
         guard = ResOperation(self.op.getopnum(), [cmp_op], self.op.getdescr())
+        guard.setfailargs(self.op.getfailargs()[:])
         opt.emit_operation(guard)
         self.setindex(opt.operation_position()-1)
         self.setoperation(guard)
@@ -173,6 +174,7 @@
         self.strength_reduced = 0 # how many guards could be removed?
         self.strongest_guards = {}
         self.guards = {}
+        self.delayed = {}
 
     def collect_guard_information(self, loop):
         operations = loop.operations
@@ -271,8 +273,30 @@
 
     def emit_operation(self, op):
         self.renamer.rename(op)
+        #if op.is_always_pure():
+        #    self.delay(op)
+        #    return
+        #self.emit_delayed_for(op)
+        #if not op.is_always_pure():
         self._newoperations.append(op)
 
+    def delay(self, op):
+        self.delayed[op] = None
+        print "delayed", op
+
+    def emit_delayed_for(self, op):
+        if op.is_inputarg():
+            return
+        additional = []
+        if op.is_guard():
+            additional = op.getfailargs()
+        for arg in op.getarglist() + additional:
+            if arg in self.delayed:
+                del self.delayed[arg]
+                self.emit_delayed_for(arg)
+                self._newoperations.append(op)
+
+
     def operation_position(self):
         return len(self._newoperations)
 
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -5,6 +5,7 @@
 from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
         MemoryRef, Node, IndexVar)
 from rpython.jit.metainterp.optimizeopt.renamer import Renamer
+from rpython.jit.metainterp.resume import AccumInfo
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.jit.metainterp.jitexc import NotAProfitableLoop
 from rpython.rlib.objectmodel import specialize, always_inline
@@ -23,14 +24,16 @@
     def post_schedule(self):
         loop = self.graph.loop
         self.renamer.rename(loop.jump)
+        self.ensure_args_unpacked(loop.jump)
         loop.operations = self.oplist
         loop.prefix = self.invariant_oplist
-        if len(self.invariant_vector_vars) > 0:
-            # TODO, accum?
+        if len(self.invariant_vector_vars) + len(self.invariant_oplist) > 0:
             args = loop.label.getarglist_copy() + self.invariant_vector_vars
             opnum = loop.label.getopnum()
             # TODO descr?
-            loop.prefix_label = loop.label.copy_and_change(opnum, args)
+            op = loop.label.copy_and_change(opnum, args)
+            self.renamer.rename(op)
+            loop.prefix_label = op
 
     def profitable(self):
         return True
@@ -172,25 +175,22 @@
     def any_size(self):
         return self.bytesize == TypeRestrict.ANY_SIZE
 
+    @always_inline
+    def any_count(self):
+        return self.count == TypeRestrict.ANY_COUNT
+
     def check(self, value):
         assert value.datatype != '\x00'
         if self.type != TypeRestrict.ANY_TYPE:
-            if self.type != value.datatype:
-                assert 0, "type mismatch"
-
+            assert self.type == value.datatype
         assert value.bytesize > 0
         if not self.any_size():
-            if self.bytesize != value.bytesize:
-                assert 0, "size mismatch"
-
+            assert self.bytesize == value.bytesize
         assert value.count > 0
         if self.count != TypeRestrict.ANY_COUNT:
-            if self.count != value.count:
-                assert 0, "count mismatch"
-
+            assert value.count >= self.count
         if self.sign != TypeRestrict.ANY_SIGN:
-            if bool(self.sign) != value.sign:
-                assert 0, "sign mismatch"
+            assert bool(self.sign) == value.sign
 
     def max_input_count(self, count):
         """ How many """
@@ -205,8 +205,7 @@
     TR_ANY_INTEGER = TypeRestrict(INT)
     TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
     TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
-    TR_LONG = TypeRestrict(INT, 8, 2)
-    TR_INT_2 = TypeRestrict(INT, 4, 2)
+    TR_INT32_2 = TypeRestrict(INT, 4, 2)
 
     # note that the following definition is x86 arch specific
     MAPPING = {
@@ -237,9 +236,10 @@
         rop.VEC_INT_SIGNEXT:        [TR_ANY_INTEGER],
 
         rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT:  [TR_DOUBLE_2],
-        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT:  [TR_FLOAT_2],
+        # weird but the trace will store single floats in int boxes
+        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT:  [TR_INT32_2],
         rop.VEC_CAST_FLOAT_TO_INT:          [TR_DOUBLE_2],
-        rop.VEC_CAST_INT_TO_FLOAT:          [TR_INT_2],
+        rop.VEC_CAST_INT_TO_FLOAT:          [TR_INT32_2],
 
         rop.VEC_FLOAT_EQ:           [TR_ANY_FLOAT,TR_ANY_FLOAT],
         rop.VEC_FLOAT_NE:           [TR_ANY_FLOAT,TR_ANY_FLOAT],
@@ -264,11 +264,6 @@
         assert isinstance(vecop, GuardResOp)
         vecop.setfailargs(op.getfailargs())
         vecop.rd_snapshot = op.rd_snapshot
-    if pack.is_accumulating():
-        for i,node in enumerate(pack.operations):
-            op = node.getoperation()
-            state.accumulation[op] = pack
-
 
 def prepare_arguments(state, pack, args):
     # Transforming one argument to a vector box argument
@@ -344,6 +339,12 @@
 
 @always_inline
 def position_values(state, restrict, pack, args, index, position):
+    arg = args[index]
+    newcount, count = restrict.count, arg.count
+    if not restrict.any_count() and newcount != count:
+        if position == 0:
+            pass
+        pass
     if position != 0:
         # The vector box is at a position != 0 but it
         # is required to be at position 0. Unpack it!
@@ -527,18 +528,17 @@
             #self.appendedvar_pos_arg_count = 
len(sched_data.invariant_vector_vars)
             failargs = op.getfailargs()
             descr = op.getdescr()
+            # note: stitching a guard must resemble the order of the label
+            # otherwise a wrong mapping is handed to the register allocator
             for i,arg in enumerate(failargs):
                 if arg is None:
                     continue
                 accum = self.accumulation.get(arg, None)
                 if accum:
                     assert isinstance(accum, AccumPack)
-                    accum.attach_accum_info(descr.rd_accum_list, i)
-
-    def post_schedule(self):
-        loop = self.graph.loop
-        self.ensure_args_unpacked(loop.jump)
-        SchedulerState.post_schedule(self)
+                    accum.attach_accum_info(descr, i, arg)
+                    seed = accum.getseed()
+                    failargs[i] = self.renamer.rename_map.get(seed, seed)
 
     def profitable(self):
         return self.costmodel.profitable()
@@ -602,6 +602,8 @@
         if var:
             if var in self.invariant_vector_vars:
                 return arg
+            if arg in self.accumulation:
+                return var
             args = [var, ConstInt(pos), ConstInt(1)]
             vecop = OpHelpers.create_vec_unpack(var.type, args, var.bytesize,
                                                 var.signed, 1)
@@ -757,12 +759,12 @@
             vector register.
         """
         before_count = len(packlist)
-        #print "splitting pack", self
+        print "splitting pack", self
         pack = self
         while pack.pack_load(vec_reg_size) > Pack.FULL:
             pack.clear()
             oplist, newoplist = pack.slice_operations(vec_reg_size)
-            #print "  split of %dx, left: %d" % (len(oplist), len(newoplist))
+            print "  split of %dx, left: %d" % (len(oplist), len(newoplist))
             pack.operations = oplist
             pack.update_pack_of_nodes()
             if not pack.leftmost().is_typecast():
@@ -778,7 +780,7 @@
                 newpack.clear()
                 newpack.operations = []
                 break
-        #print "  => %dx packs out of %d operations" % (-before_count + 
len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
+        print "  => %dx packs out of %d operations" % (-before_count + 
len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
         pack.update_pack_of_nodes()
 
     def slice_operations(self, vec_reg_size):
@@ -864,9 +866,8 @@
         return 0
 
     def attach_accum_info(self, descr, position, scalar):
-        descr.rd_accum_list = AccumInfo(descr.rd_accum_list,
-                                        position, self.operator,
-                                        self.scalar, None)
+        descr.rd_accum_list = AccumInfo(descr.rd_accum_list, position, 
self.operator,
+                                        scalar, None)
 
     def is_accumulating(self):
         return True
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -1085,7 +1085,7 @@
         f2 = float_add(f0, f1)
         i1 = int_add(i0, 8)
         i2 = int_lt(i1, 100)
-        guard_false(i2) [p0, i0, f2]
+        guard_true(i2) [p0, i0, f2]
         jump(p0, i1, f2)
         """
         trace_opt = """
@@ -1094,9 +1094,11 @@
         v7[2xf64] = vec_int_xor(v6[0xf64], v6[0xf64])
         v2[2xf64] = vec_pack_f(v7[2xf64], f0, 0, 1)
         label(p0, i0, v2[2xf64])
+        i100 = int_add(i0, 8)
+        i200 = int_lt(i100, 100)
         i1 = int_add(i0, 16)
         i2 = int_lt(i1, 100)
-        guard_false(i2) [p0, i0, v2[2xf64]]
+        guard_true(i2) [p0, i0, v2[2xf64]]
         i10 = int_add(i0, 16)
         i20 = int_lt(i10, 100)
         v1[2xf64] = vec_raw_load_f(p0, i0, descr=floatarraydescr)
@@ -1108,7 +1110,7 @@
         self.assert_equal(loop, self.parse_loop(trace_opt))
 
     def test_element_f45_in_guard_failargs(self):
-        ops = """
+        trace = self.parse_loop("""
         [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, 
i44, i21, i4, i0, i18]
         f45 = raw_load_f(i21, i44, descr=floatarraydescr) 
         guard_not_invalidated() [p38, p12, p9, p14, f45, p39, i37, i44, f35, 
i40, p42, i43, None, i28, p36, i41]
@@ -1122,33 +1124,33 @@
         i52 = int_ge(i50, i18) 
         guard_false(i52) [p38, p12, p9, p14, i48, i46, f47, i51, i50, f45, 
p39, None, None, None, i40, p42, i43, None, None, p36, None]
         jump(p36, i50, p9, i51, p14, f45, p12, p38, f47, p39, i40, i48, p42, 
i43, i46, i21, i4, i0, i18)
-        """
-        opt = """
+        """)
+        trace_opt = self.parse_loop("""
         [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, 
i44, i21, i4, i0, i18]
-        guard_not_invalidated() [p38, p12, p9, p14, p39, i37, i44, f35, i40, 
p42, i43, f34, i28, p36, i41]
+        guard_not_invalidated() [p36, i28, p9, i37, p14, f34, p12, p38, f35, 
p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
         i50 = int_add(i28, 1) 
-        i48 = int_add(i41, 8) 
-        i51 = int_add(i37, 8) 
-        i54 = int_add(i41, 16) 
-        i46 = int_add(i44, 8) 
-        i56 = int_add(i37, 16) 
-        i52 = int_ge(i50, i18) 
-        i637 = int_add(i28, 2)
-        i638 = int_ge(i637, i18)
+        i20 = int_ge(i50, i18)
+        i54 = int_add(i28, 2) 
+        i638 = int_ge(i54, i18)
         guard_false(i638) [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, 
i40, i41, p42, i43, i44, i21, i4, i0, i18]
+        i12 = int_add(i44, 8)
+        i56 = int_add(i41, 8) 
+        i46 = int_add(i37, 8) 
+        i47 = int_add(i28, 2) 
+        i52 = int_ge(i47, i18) 
         i55 = int_add(i44, 16) 
-        i629 = int_add(i28, 2)
-        i57 = int_ge(i637, i18) 
-        v61 = vec_raw_load_f(i21, i44, 2, descr=floatarraydescr) 
-        v62 = vec_raw_load_f(i4, i41, 2, descr=floatarraydescr) 
-        v63 = vec_float_add(v61, v62) 
+        i629 = int_add(i41, 16)
+        i637 = int_add(i37, 16)
+        v61[2xf64] = vec_raw_load_f(i21, i44, descr=floatarraydescr) 
+        v62[2xf64] = vec_raw_load_f(i4, i41, descr=floatarraydescr) 
+        v63[2xf64] = vec_float_add(v61, v62) 
         vec_raw_store(i0, i37, v63, descr=floatarraydescr) 
-        f100 = vec_float_unpack(v61, 1, 1)
-        f101 = vec_float_unpack(v62, 1, 1)
+        f100 = vec_unpack_f(v61, 1, 1)
+        f101 = vec_unpack_f(v62, 1, 1)
         jump(p36, i637, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, 
p42, i43, i55, i21, i4, i0, i18)
-        """
-        vopt = self.vectorize(self.parse_loop(ops))
-        self.assert_equal(vopt.loop, self.parse_loop(opt))
+        """)
+        vopt = self.vectorize(trace)
+        self.assert_equal(trace, trace_opt)
 
     def test_shrink_vector_size(self):
         ops = """
@@ -1187,7 +1189,7 @@
         self.assert_equal(loop, self.parse_loop(opt))
 
     def test_castup_arith_castdown(self):
-        ops = """
+        trace = self.parse_loop("""
         [p0,p1,p2,i0,i4]
         i10 = raw_load_i(p0, i0, descr=float32arraydescr)
         i1 = int_add(i0, 4)
@@ -1201,76 +1203,57 @@
         i186 = int_lt(i5, 100) 
         guard_true(i186) []
         jump(p0,p1,p2,i1,i5)
-        """
-        opt = """
+        """)
+        trace_opt = self.parse_loop("""
         [p0, p1, p2, i0, i4]
         i5 = int_add(i4, 4)
-        i1 = int_add(i0, 4)
         i186 = int_lt(i5, 100)
         i500 = int_add(i4, 16)
         i501 = int_lt(i500, 100)
         guard_true(i501) [p0, p1, p2, i0, i4]
-        i189 = int_add(i0, 8)
+        i189 = int_add(i0, 4)
         i187 = int_add(i4, 8)
-        i198 = int_add(i0, 12)
         i188 = int_lt(i187, 100)
-        i207 = int_add(i0, 16)
+        i207 = int_add(i0, 8)
         i196 = int_add(i4, 12)
         i197 = int_lt(i196, 100)
-        i205 = int_add(i4, 16)
-        i206 = int_lt(i205, 100)
-        v228 = vec_raw_load_i(p0, i0, 4, descr=float32arraydescr)
-        v229 = vec_cast_singlefloat_to_float(v228)
-        v230 = vec_int_unpack(v228, 2, 2)
+        i205 = int_add(i0, 12)
+        i400 = int_add(i4, 16)
+        i401= int_lt(i400, 100)
+        i402 = int_add(i0, 16)
+        v228[4xi32] = vec_raw_load_i(p0, i0, descr=float32arraydescr)
+        v229[2xf64] = vec_cast_singlefloat_to_float(v228)
+        v230 = vec_unpack_i(v228, 2, 2)
         v231 = vec_cast_singlefloat_to_float(v230)
-        v232 = vec_raw_load_i(p1, i1, 4, descr=float32arraydescr)
+        v232 = vec_raw_load_i(p1, i189, descr=float32arraydescr)
         v233 = vec_cast_singlefloat_to_float(v232)
-        v234 = vec_int_unpack(v232, 2, 2)
+        v236 = vec_float_add(v229, v233)
+        v238 = vec_cast_float_to_singlefloat(v236)
+        v234 = vec_unpack_i(v232, 2, 2)
         v235 = vec_cast_singlefloat_to_float(v234)
         v237 = vec_float_add(v231, v235)
         v239 = vec_cast_float_to_singlefloat(v237)
-        v236 = vec_float_add(v229, v233)
-        v238 = vec_cast_float_to_singlefloat(v236)
-        v240 = vec_pack_f(v238, v239, 2, 2)
+        v240 = vec_pack_i(v238, v239, 2, 2)
         vec_raw_store(p2, i4, v240, descr=float32arraydescr)
         jump(p0, p1, p2, i207, i500)
-        """
-        vopt = self.vectorize(self.parse_loop(ops))
-        self.assert_equal(vopt.loop, self.parse_loop(opt))
-
-    def test_truediv_abs_neg_float(self):
-        ops = """
-        [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19]
-        f20 = raw_load(i16, i12, descr=floatarraydescr)
-        guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, 
i15, i14, p13]
-        i23 = int_add(i12, 8)
-        f24 = float_truediv(f20, f17)
-        f25 = float_abs(f20)
-        f26 = float_neg(f20)
-        raw_store(i18, i15, f24, descr=floatarraydescr)
-        i26 = int_add(i14, 1)
-        i28 = int_add(i15, 8)
-        i29 = int_ge(i26, i19)
-        guard_false(i29) [p8, p7, p5, p4, p2, f20, i23, i28, None, p13]
-        jump(f20, p10, i11, p4, i23, p2, p5, p13, i26, p7, i28, p8, i16, f17, 
i18, i19)
-        """
-        opt = self.vectorize(self.parse_loop(ops))
-        self.debug_print_operations(opt.loop)
+        """)
+        vopt = self.vectorize(trace)
+        self.assert_equal(trace, trace_opt)
 
     def test_axis_sum(self):
         trace = """
         [i1, p10, i11, p8, i12, p3, p4, p13, i14, i15, p6, p9, i16, i17, i18, 
i19, i20, i21, i22, i23]
-        f24 = raw_load(i16, i12, descr=floatarraydescr)
+        f24 = raw_load_f(i16, i12, descr=floatarraydescr)
         guard_not_invalidated() [i1, p9, p8, p6, p4, p3, f24, i11, i15, p13, 
i12, i14, p10]
         i26 = int_add(i12, 8)
-        i27 = getarrayitem_gc(p13, i1, descr=floatarraydescr)
+        i27 = getarrayitem_gc_f(p13, i1, descr=floatarraydescr)
         i28 = int_is_zero(i27)
         guard_false(i28) [i1, p9, p8, p6, p4, p3, f24, i26, i11, i15, p13, 
None, i14, p10]
-        f30 = raw_load(i17, i15, descr=floatarraydescr)
+        f30 = raw_load_f(i17, i15, descr=floatarraydescr)
         f31 = float_add(f30, f24)
         raw_store(i18, i15, f31, descr=floatarraydescr)
         i33 = int_add(i14, 1)
-        i34 = getarrayitem_gc(p13, i19, descr=floatarraydescr)
+        i34 = getarrayitem_gc_f(p13, i19, descr=floatarraydescr)
         i35 = int_lt(i34, i20)
         guard_true(i35) [i1, p9, p8, p6, p4, p3, i21, i34, i15, i33, i19, p13, 
f31, None, i26, i11, None, None, None, i14, p10]
         i37 = int_add(i34, 1)
@@ -1287,7 +1270,8 @@
             pass
 
     def test_cast_1(self):
-        trace = """
+        # TODO
+        trace = self.parse_loop("""
         [i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, 
i20, i21, i22, i23]
         i24 = raw_load_i(i20, i16, descr=float32arraydescr)
         guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, 
p14, p11, i18, i15, i16, None]
@@ -1304,11 +1288,33 @@
         i39 = int_ge(i36, i23)
         guard_false(i39) [p8, p5, p4, p2, i27, i28, i30, i24, i38, i36, p17, 
None, None, None, None, p14, p11, i18, i15, None, None]
         jump(i24, i28, p2, p11, i36, i38, p4, p5, p14, i15, p8, i27, p17, i18, 
i30, i20, i21, i22, i23)
-        """
-        opt = self.vectorize(self.parse_loop(trace))
-        self.debug_print_operations(opt.loop)
+        """)
+        opt = self.vectorize(trace)
+        self.debug_print_operations(trace)
+
+    def test_truediv_abs_neg_float(self):
+        # TODO
+        trace = self.parse_loop("""
+        [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19]
+        f20 = raw_load_f(i16, i12, descr=floatarraydescr)
+        guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, 
i15, i14, p13]
+        i23 = int_add(i12, 8)
+        f24 = float_truediv(f20, f17)
+        f25 = float_abs(f20)
+        f26 = float_neg(f20)
+        raw_store(i18, i15, f24, descr=floatarraydescr)
+        i26 = int_add(i14, 1)
+        i28 = int_add(i15, 8)
+        i29 = int_ge(i26, i19)
+        guard_false(i29) [p8, p7, p5, p4, p2, f20, i23, i28, None, p13]
+        jump(f20, p10, i11, p4, i23, p2, p5, p13, i26, p7, i28, p8, i16, f17, 
i18, i19)
+        """)
+        opt = self.vectorize(trace)
+        self.debug_print_operations(trace)
+
 
     def test_all_guard(self):
+        # TODO
         trace = """
         [p0, p3, i4, i5, i6, i7]
         f8 = raw_load_f(i6, i5, descr=floatarraydescr)
@@ -1327,6 +1333,7 @@
         self.debug_print_operations(loop)
 
     def test_max(self):
+        # TODO
         trace = """
         [p3, i4, p2, i5, f6, i7, i8]
         f9 = raw_load_f(i7, i5, descr=floatarraydescr)
diff --git a/rpython/jit/metainterp/optimizeopt/util.py 
b/rpython/jit/metainterp/optimizeopt/util.py
--- a/rpython/jit/metainterp/optimizeopt/util.py
+++ b/rpython/jit/metainterp/optimizeopt/util.py
@@ -148,6 +148,7 @@
             x = op1.getarg(i)
             y = op2.getarg(i)
             assert x.same_box(remap.get(y, y))
+            assert x.same_shape(remap.get(y, y))
         if op2 in remap:
             assert op1.same_box(remap[op2])
         else:
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -36,7 +36,7 @@
 class VectorLoop(object):
     def __init__(self, label, oplist, jump):
         self.label = label
-        self.inputargs = label.getarglist()
+        self.inputargs = label.getarglist_copy()
         self.prefix = []
         self.prefix_label = None
         assert self.label.getopnum() == rop.LABEL
@@ -160,15 +160,6 @@
         self.has_two_labels = False
 
     def propagate_all_forward(self, info, loop):
-        #label = loop.label
-        #jump = loop.jump
-        #if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
-        #   label.getopnum() != rop.LABEL:
-        #    import pdb; pdb. set_trace()
-        #    raise NotAVectorizeableLoop()
-        #if jump.numargs() != label.numargs():
-        #    import pdb; pdb. set_trace()
-        #    raise NotAVectorizeableLoop()
         self.orig_label_args = loop.label.getarglist_copy()
         self.linear_find_smallest_type(loop)
         byte_count = self.smallest_type_bytes
@@ -207,29 +198,6 @@
     def unroll_loop_iterations(self, loop, unroll_count):
         """ Unroll the loop X times. unroll_count + 1 = unroll_factor """
         numops = len(loop.operations)
-        # use the target token of the label
-        #target_token = label_op.getdescr()
-        #if not we_are_translated():
-        #    target_token.assumed_classes = {}
-        #if jump_op.getopnum() == rop.LABEL:
-        #    jump_op = ResOperation(rop.JUMP, jump_op.getarglist(), 
target_token)
-        #else:
-        #    jump_op = jump_op.clone()
-        #    jump_op.setdescr(target_token)
-        #assert jump_op.is_final()
-
-        #self.emit_unrolled_operation(label_op)
-
-        #for i in range(0,numops):
-        #    op = loop.operations[i].copy()
-        #    if op.is_guard():
-        #        assert isinstance(op, GuardResOp)
-        #        failargs = renamer.rename_failargs(op, clone=True)
-        #        snapshot = renamer.rename_rd_snapshot(op.rd_snapshot, 
clone=True)
-        #        op.setfailargs(failargs)
-        #        op.rd_snapshot = snapshot
-        #    operations.append(op)
-        #    self.emit_unrolled_operation(op)
 
         renamer = Renamer()
         operations = loop.operations
@@ -560,16 +528,12 @@
         """ Marks this guard as an early exit! """
         op = node.getoperation()
         assert isinstance(op, GuardResOp)
-        descr = None
         if op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE):
             descr = CompileLoopVersionDescr()
-        else:
-            descr = ResumeAtLoopHeaderDescr()
-        if op.getdescr():
-            descr.copy_all_attributes_from(op.getdescr())
-        #
-        op.setdescr(descr)
-        op.setfailargs(loop.inputargs)
+            if op.getdescr():
+                descr.copy_all_attributes_from(op.getdescr())
+            op.setdescr(descr)
+        op.setfailargs(loop.label.getarglist_copy())
 
 class CostModel(object):
     """ Utility to estimate the savings for the new trace loop.
@@ -789,6 +753,9 @@
         for pack in self.packs:
             if not pack.is_accumulating():
                 continue
+            for i,node in enumerate(pack.operations):
+                op = node.getoperation()
+                state.accumulation[op] = pack
             assert isinstance(pack, AccumPack)
             datatype = pack.getdatatype()
             bytesize = pack.getbytesize()
@@ -818,6 +785,7 @@
             state.setvector_of_box(pack.getseed(), 0, vecop) # prevent it from 
expansion
             state.renamer.start_renaming(pack.getseed(), vecop)
 
+
     def split_overloaded_packs(self):
         newpacks = []
         for i,pack in enumerate(self.packs):
diff --git a/rpython/jit/metainterp/optimizeopt/version.py 
b/rpython/jit/metainterp/optimizeopt/version.py
--- a/rpython/jit/metainterp/optimizeopt/version.py
+++ b/rpython/jit/metainterp/optimizeopt/version.py
@@ -28,9 +28,6 @@
         else:
             self.descrs.append(descr)
         self.leads_to[descr] = version
-        # note: stitching a guard must resemble the order of the label
-        # otherwise a wrong mapping is handed to the register allocator
-        op.setfailargs(version.renamed_inputargs)
         assert version.renamed_inputargs is not None
 
     def remove(self, descr):
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -37,7 +37,7 @@
         return self is other
 
     def same_shape(self, other):
-        return self is other
+        return True
 
     def repr_short(self, memo):
         return self.repr(memo)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to