Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78728:948a227eab7f
Date: 2015-07-31 16:01 +0200
http://bitbucket.org/pypy/pypy/changeset/948a227eab7f/

Log:    that was a nasty problem. entering the vecopt trace through the
        preamble only worked for non accum/expanded traces, otherwise the
        arguments would not match the loop has now an original label, where
        invariant operations follow leading to an label that can carry
        expanded values

diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -85,7 +85,7 @@
 call_many_to_one_driver = jit.JitDriver(
     name='numpy_call_many_to_one',
     greens=['shapelen', 'nin', 'func', 'res_dtype'],
-    reds='auto', vectorize=True)
+    reds='auto')
 
 def call_many_to_one(space, shape, func, res_dtype, in_args, out):
     # out must hav been built. func needs no calc_type, is usually an
@@ -119,7 +119,7 @@
 call_many_to_many_driver = jit.JitDriver(
     name='numpy_call_many_to_many',
     greens=['shapelen', 'nin', 'nout', 'func', 'res_dtype'],
-    reds='auto', vectorize=True)
+    reds='auto')
 
 def call_many_to_many(space, shape, func, res_dtype, in_args, out_args):
     # out must hav been built. func needs no calc_type, is usually an
@@ -228,7 +228,7 @@
 reduce_cum_driver = jit.JitDriver(
     name='numpy_reduce_cum_driver',
     greens=['shapelen', 'func', 'dtype', 'out_dtype'],
-    reds='auto', vectorize=True)
+    reds='auto')
 
 def compute_reduce_cumulative(space, obj, out, calc_dtype, func, identity):
     obj_iter, obj_state = obj.create_iter()
@@ -356,7 +356,7 @@
 def _new_argmin_argmax(op_name):
     arg_driver = jit.JitDriver(name='numpy_' + op_name,
                                greens = ['shapelen', 'dtype'],
-                               reds = 'auto', vectorize=True)
+                               reds = 'auto')
 
     def argmin_argmax(arr):
         result = 0
@@ -536,7 +536,7 @@
 
 flatiter_getitem_driver = jit.JitDriver(name = 'numpy_flatiter_getitem',
                                         greens = ['dtype'],
-                                        reds = 'auto')
+                                        reds = 'auto', vectorize=True)
 
 def flatiter_getitem(res, base_iter, base_state, step):
     ri, rs = res.create_iter()
@@ -570,7 +570,7 @@
 
 fromstring_driver = jit.JitDriver(name = 'numpy_fromstring',
                                   greens = ['itemsize', 'dtype'],
-                                  reds = 'auto', vectorize=True)
+                                  reds = 'auto')
 
 def fromstring_loop(space, a, dtype, itemsize, s):
     i = 0
@@ -604,7 +604,7 @@
 getitem_int_driver = jit.JitDriver(name = 'numpy_getitem_int',
                                    greens = ['shapelen', 'indexlen',
                                              'prefixlen', 'dtype'],
-                                   reds = 'auto', vectorize=True)
+                                   reds = 'auto')
 
 def getitem_array_int(space, arr, res, iter_shape, indexes_w, prefix_w):
     shapelen = len(iter_shape)
@@ -632,7 +632,7 @@
 setitem_int_driver = jit.JitDriver(name = 'numpy_setitem_int',
                                    greens = ['shapelen', 'indexlen',
                                              'prefixlen', 'dtype'],
-                                   reds = 'auto', vectorize=True)
+                                   reds = 'auto')
 
 def setitem_array_int(space, arr, iter_shape, indexes_w, val_arr,
                       prefix_w):
@@ -762,7 +762,7 @@
 
 diagonal_simple_driver = jit.JitDriver(name='numpy_diagonal_simple_driver',
                                        greens = ['axis1', 'axis2'],
-                                       reds = 'auto', vectorize=True)
+                                       reds = 'auto')
 
 def diagonal_simple(space, arr, out, offset, axis1, axis2, size):
     out_iter, out_state = out.create_iter()
@@ -806,7 +806,7 @@
 def _new_binsearch(side, op_name):
     binsearch_driver = jit.JitDriver(name='numpy_binsearch_' + side,
                                      greens=['dtype'],
-                                     reds='auto', vectorize=True)
+                                     reds='auto')
 
     def binsearch(space, arr, key, ret):
         assert len(arr.get_shape()) == 1
diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -466,8 +466,6 @@
     def test_cumsum(self):
         result = self.run("cumsum")
         assert result == 15
-        # not vectorizable, has one back edge
-        self.check_vectorized(1, 0)
 
     def define_axissum():
         return """
@@ -803,7 +801,7 @@
     def test_flat_getitem(self):
         result = self.run("flat_getitem")
         assert result == 10.0
-        self.check_vectorized(0,0)
+        self.check_vectorized(1,1)
 
     def define_flat_setitem():
         return '''
diff --git a/rpython/jit/metainterp/compile.py 
b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -149,23 +149,14 @@
                           [inliner.inline_op(h_ops[i]) for i in range(start, 
len(h_ops))] + \
                           [ResOperation(rop.JUMP, [inliner.inline_arg(a) for a 
in jumpargs],
                                         None, descr=jitcell_token)]
-        target_token = part.operations[0].getdescr()
-        assert isinstance(target_token, TargetToken)
-        all_target_tokens.append(target_token)
-        inputargs = jumpargs
-        jumpargs = part.operations[-1].getarglist()
-
         try:
             optimize_trace(metainterp_sd, jitdriver_sd, part, warmstate,
                            start_state=start_state, export_state=False,
                            try_disabling_unroll=try_disabling_unroll)
         except InvalidLoop:
             return None
-
-        loop.operations = loop.operations[:-1] + part.operations
-        loop.versions = part.versions
-        if part.quasi_immutable_deps:
-            loop.quasi_immutable_deps.update(part.quasi_immutable_deps)
+        #
+        loop.append_loop(part, all_target_tokens)
     assert part.operations[-1].getopnum() != rop.LABEL
 
     if loop.versions is not None:
@@ -197,7 +188,6 @@
     metainterp_sd = metainterp.staticdata
     cpu = metainterp_sd.cpu
     if loop.versions is not None:
-        token = jitcell_token
         for version in loop.versions:
             if len(version.faildescrs) == 0:
                 continue
diff --git a/rpython/jit/metainterp/history.py 
b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -761,7 +761,10 @@
 
     def register_all_guards(self, opt_ops, invariant_arg_count=0):
         from rpython.jit.metainterp.compile import CompileLoopVersionDescr
+        pass_by = 0
         idx = index_of_first(rop.LABEL, opt_ops)
+        if opt_ops[idx].getdescr() is not opt_ops[-1].getdescr():
+            idx = index_of_first(rop.LABEL, opt_ops, pass_by=1)
         assert idx >= 0
         version_failargs = opt_ops[idx].getarglist()
         if invariant_arg_count > 0:
@@ -799,6 +802,7 @@
         op.rd_snapshot = None
 
     def update_token(self, jitcell_token):
+        # this is only invoked for versioned loops!
         label = self.operations[self.label_pos]
         jump = self.operations[-1]
         #
@@ -849,6 +853,29 @@
             insns[opname] = insns.get(opname, 0) + 1
         return insns
 
+    def append_loop(self, loop, all_target_tokens):
+        # append e.g. the peeled loop to this loop!
+        label, jump = loop.operations[0], loop.operations[-1]
+        assert label.getopnum() == rop.LABEL
+        assert jump.getopnum() == rop.JUMP
+        target_token = None
+        i = 0
+        # adds all target token until the one is found that jumps from the 
+        # last instruction to the label
+        while target_token is not jump.getdescr():
+            # there is another label
+            op = loop.operations[i]
+            if op.getopnum() == rop.LABEL:
+                target_token = op.getdescr()
+                assert isinstance(target_token, TargetToken)
+                all_target_tokens.append(target_token)
+            i += 1
+        #
+        self.operations = self.operations[:-1] + loop.operations
+        self.versions = loop.versions
+        if loop.quasi_immutable_deps:
+            self.quasi_immutable_deps.update(loop.quasi_immutable_deps)
+
     def get_operations(self):
         return self.operations
 
diff --git a/rpython/jit/metainterp/optimizeopt/guard.py 
b/rpython/jit/metainterp/optimizeopt/guard.py
--- a/rpython/jit/metainterp/optimizeopt/guard.py
+++ b/rpython/jit/metainterp/optimizeopt/guard.py
@@ -119,8 +119,8 @@
         descr = myop.getdescr()
         descr.copy_all_attributes_from(other.op.getdescr())
         myop.rd_frame_info_list = otherop.rd_frame_info_list
+        myop.setfailargs(otherop.getfailargs())
         myop.rd_snapshot = otherop.rd_snapshot
-        myop.setfailargs(otherop.getfailargs())
 
     def emit_varops(self, opt, var, old_arg):
         assert isinstance(var, IndexVar)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -1,6 +1,6 @@
 
 from rpython.jit.metainterp.history import 
(VECTOR,FLOAT,INT,ConstInt,BoxVector,
-        BoxFloat,BoxInt,ConstFloat)
+        BoxFloat,BoxInt,ConstFloat,TargetToken)
 from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
 from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
         MemoryRef, Node, IndexVar)
@@ -839,24 +839,32 @@
         assert off < vector.getcount()
         self.box_to_vbox[box] = (off, vector)
 
-    def prepend_invariant_operations(self, oplist):
+    def prepend_invariant_operations(self, oplist, orig_label_args):
         if len(self.invariant_oplist) > 0:
             label = oplist[0]
             assert label.getopnum() == rop.LABEL
+            #
             jump = oplist[-1]
             assert jump.getopnum() == rop.JUMP
-
-            label_args = label.getarglist()
+            #
+            label_args = label.getarglist()[:]
             jump_args = jump.getarglist()
             for var in self.invariant_vector_vars:
                 label_args.append(var)
                 jump_args.append(var)
-
-            oplist[0] = label.copy_and_change(label.getopnum(), label_args, 
None, label.getdescr())
-            oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args, 
None, jump.getdescr())
-
-            return self.invariant_oplist + oplist
-
+            #
+            # in case of any invariant_vector_vars, the label is restored
+            # and the invariant operations are added between the original label
+            # and the new label
+            descr = label.getdescr()
+            assert isinstance(descr, TargetToken)
+            token = TargetToken(descr.targeting_jitcell_token)
+            oplist[0] = label.copy_and_change(label.getopnum(), label_args, 
None, token)
+            oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args, 
None, token)
+            #
+            return [ResOperation(rop.LABEL, orig_label_args, None, descr)] + \
+                   self.invariant_oplist + oplist
+        #
         return oplist
 
 class Pack(object):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -107,10 +107,12 @@
         self.cpu = metainterp_sd.cpu
         self.costmodel = X86_CostModel(cost_threshold, 
self.cpu.vector_register_size)
         self.appended_arg_count = 0
+        self.orig_label_args = None
 
     def propagate_all_forward(self, clear=True):
         self.clear_newoperations()
         label = self.loop.operations[0]
+        self.orig_label_args = label.getarglist()[:]
         jump = self.loop.operations[-1]
         if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
            label.getopnum() != rop.LABEL:
@@ -463,7 +465,8 @@
                     if accum:
                         accum.save_to_descr(op.getdescr(),i)
         self.loop.operations = \
-            sched_data.prepend_invariant_operations(self._newoperations)
+            sched_data.prepend_invariant_operations(self._newoperations,
+                                                    self.orig_label_args)
         self.clear_newoperations()
 
     def unpack_from_vector(self, op, sched_data, renamer):
@@ -577,7 +580,7 @@
         #
         tgt_op.setdescr(descr)
         tgt_op.rd_snapshot = op.rd_snapshot
-        tgt_op.setfailargs(op.getfailargs())
+        tgt_op.setfailargs(op.getfailargs()[:])
 
 
 class CostModel(object):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to