Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r77974:66758cffe3af
Date: 2015-06-09 09:34 +0200
http://bitbucket.org/pypy/pypy/changeset/66758cffe3af/

Log:    generating vector box for accumulation before the label and renaming
        occurances

diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -88,6 +88,49 @@
         node.clear_dependencies()
         node.emitted = True
 
+def vectorbox_outof_box(box, count=-1, size=-1, type='-', clone_signed=True, 
signed=False):
+    if box.type not in (FLOAT, INT):
+        raise AssertionError("cannot create vector box of type %s" % 
(box.type))
+    signed = True
+    if box.type == FLOAT:
+        signed = False
+    return BoxVector(box.type, 2, 8, signed)
+
+def vectorbox_clone_set(box, count=-1, size=-1, type='-', clone_signed=True, 
signed=False):
+    if count == -1:
+        count = box.item_count
+    if size == -1:
+        size = box.item_size
+    if type == '-':
+        type = box.item_type
+    if clone_signed:
+        signed = box.item_signed
+    return BoxVector(type, count, size, signed)
+
+def getpackopnum(type):
+    if type == INT:
+        return rop.VEC_INT_PACK
+    elif type == FLOAT:
+        return rop.VEC_FLOAT_PACK
+    #
+    raise AssertionError("getpackopnum type %s not supported" % (type,))
+
+def getunpackopnum(type):
+    if type == INT:
+        return rop.VEC_INT_UNPACK
+    elif type == FLOAT:
+        return rop.VEC_FLOAT_UNPACK
+    #
+    raise AssertionError("getunpackopnum type %s not supported" % (type,))
+
+def getexpandopnum(type):
+    if type == INT:
+        return rop.VEC_INT_EXPAND
+    elif type == FLOAT:
+        return rop.VEC_FLOAT_EXPAND
+    #
+    raise AssertionError("getexpandopnum type %s not supported" % (type,))
+
 class PackType(object):
     UNKNOWN_TYPE = '-'
 
@@ -163,9 +206,6 @@
         self.input_type = None
         self.output_type = None
 
-    def clone_vbox_set_count(self, box, count):
-        return BoxVector(box.item_type, count, box.item_size, box.item_signed)
-
     def is_vector_arg(self, i):
         if i < 0 or i >= len(self.arg_ptypes):
             return False
@@ -321,10 +361,8 @@
         return vbox_cloned
 
     def unpack(self, vbox, index, count, arg_ptype):
-        vbox_cloned = self.clone_vbox_set_count(vbox, count)
-        opnum = rop.VEC_FLOAT_UNPACK
-        if vbox.item_type == INT:
-            opnum = rop.VEC_INT_UNPACK
+        vbox_cloned = vectorbox_clone_set(vbox, count=count)
+        opnum = getunpackopnum(vbox.item_type)
         op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], 
vbox_cloned)
         self.preamble_ops.append(op)
         return vbox_cloned
@@ -336,9 +374,7 @@
           this function creates a box pack instruction to merge them to:
           v1/2 = [A,B,X,Y]
         """
-        opnum = rop.VEC_FLOAT_PACK
-        if tgt_box.item_type == INT:
-            opnum = rop.VEC_INT_PACK
+        opnum = getpackopnum(tgt_box.item_type)
         arg_count = len(args)
         i = index
         while i < arg_count and tgt_box.item_count < packable:
@@ -348,7 +384,7 @@
                 i += 1
                 continue
             count = tgt_box.item_count + src_box.item_count
-            new_box = self.clone_vbox_set_count(tgt_box, count)
+            new_box = vectorbox_clone_set(tgt_box, count=count)
             op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i),
                                       ConstInt(src_box.item_count)], new_box)
             self.preamble_ops.append(op)
@@ -404,9 +440,7 @@
                 break
             i += 1
         else:
-            expand_opnum = rop.VEC_FLOAT_EXPAND
-            if box_type == INT:
-                expand_opnum = rop.VEC_INT_EXPAND
+            expand_opnum = getexpandopnum(box_type)
             op = ResOperation(expand_opnum, [arg], vbox)
             invariant_ops.append(op)
             invariant_vars.append(vbox)
@@ -415,9 +449,7 @@
 
         op = ResOperation(rop.VEC_BOX, [ConstInt(len(nodes))], vbox)
         invariant_ops.append(op)
-        opnum = rop.VEC_FLOAT_PACK
-        if arg.type == INT:
-            opnum = rop.VEC_INT_PACK
+        opnum = getpackopnum(arg.type)
         for i,node in enumerate(nodes):
             op = node.getoperation()
             arg = op.getarg(argidx)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -68,7 +68,7 @@
         opt.analyse_index_calculations()
         if opt.dependency_graph is not None:
             self._write_dot_and_convert_to_svg(opt.dependency_graph, "ee" + 
self.test_name)
-            opt.schedule()
+            opt.schedule(False)
         opt.unroll_loop_iterations(loop, unroll_factor)
         opt.loop.operations = opt.get_newoperations()
         self.debug_print_operations(opt.loop)
@@ -101,7 +101,7 @@
         opt.find_adjacent_memory_refs()
         opt.extend_packset()
         opt.combine_packset()
-        opt.schedule()
+        opt.schedule(True)
         return opt
 
     def vectorize(self, loop, unroll_factor = -1):
@@ -109,7 +109,7 @@
         opt.find_adjacent_memory_refs()
         opt.extend_packset()
         opt.combine_packset()
-        opt.schedule()
+        opt.schedule(True)
         gso = GuardStrengthenOpt(opt.dependency_graph.index_vars)
         gso.propagate_all_forward(opt.loop)
         return opt
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -10,7 +10,8 @@
 from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method, 
Renamer
 from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
         MemoryRef, Node, IndexVar)
-from rpython.jit.metainterp.optimizeopt.schedule import VecScheduleData, 
Scheduler, Pack, Pair, AccumPair
+from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleData,
+        Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum)
 from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
 from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
 from rpython.rlib.objectmodel import we_are_translated
@@ -83,7 +84,6 @@
         self.smallest_type_bytes = 0
         self.early_exit_idx = -1
         self.sched_data = None
-        self.tried_to_pack = False
         self.costmodel = X86_CostModel(cost_threshold)
 
     def propagate_all_forward(self, clear=True):
@@ -107,7 +107,7 @@
         # find index guards and move to the earliest position
         self.analyse_index_calculations()
         if self.dependency_graph is not None:
-            self.schedule() # reorder the trace
+            self.schedule(False) # reorder the trace
 
         # unroll
         self.unroll_count = self.get_unroll_count(vsize)
@@ -122,7 +122,7 @@
         self.combine_packset()
         if not self.costmodel.profitable(self.packset):
             raise NotAProfitableLoop()
-        self.schedule()
+        self.schedule(True)
 
         gso = GuardStrengthenOpt(self.dependency_graph.index_vars)
         gso.propagate_all_forward(self.loop)
@@ -275,8 +275,6 @@
         loop = self.loop
         operations = loop.operations
 
-        self.tried_to_pack = True
-
         self.packset = PackSet(self.dependency_graph, operations,
                                self.unroll_count,
                                self.smallest_type_bytes)
@@ -356,17 +354,21 @@
             if len_before == len(self.packset.packs):
                 break
 
-    def schedule(self):
+    def schedule(self, vector=False):
         self.guard_early_exit = -1
         self.clear_newoperations()
         sched_data = 
VecScheduleData(self.metainterp_sd.cpu.vector_register_size)
         scheduler = Scheduler(self.dependency_graph, sched_data)
         renamer = Renamer()
+        #
+        if vector:
+            self.packset.accumulate_prepare(sched_data, renamer)
+        #
         while scheduler.has_more():
             position = len(self._newoperations)
             ops = scheduler.next(position)
             for op in ops:
-                if self.tried_to_pack:
+                if vector:
                     self.unpack_from_vector(op, sched_data, renamer)
                 self.emit_operation(op)
 
@@ -534,51 +536,6 @@
             self.accum_vars[pack.accum_variable] = pack.accum_variable
         self.packs.append(pack)
 
-    def accumulates_pair(self, lnode, rnode, origin_pack):
-        # lnode and rnode are isomorphic and dependent
-        assert isinstance(origin_pack, Pair)
-        lop = lnode.getoperation()
-        opnum = lop.getopnum()
-
-        if opnum in (rop.FLOAT_ADD, rop.INT_ADD):
-            roper = rnode.getoperation()
-            assert lop.numargs() == 2 and lop.result is not None
-            accum, accum_pos = self.getaccumulator_variable(lop, roper, 
origin_pack)
-            if not accum:
-                return None
-            # the dependency exists only because of the result of lnode
-            for dep in lnode.provides():
-                if dep.to is rnode:
-                    if not dep.because_of(accum):
-                        # not quite ... this is not handlable
-                        return None
-            # get the original variable
-            accum = lop.getarg(accum_pos)
-
-            # in either of the two cases the arguments are mixed,
-            # which is not handled currently
-            var_pos = (accum_pos + 1) % 2
-            plop = origin_pack.left.getoperation()
-            if lop.getarg(var_pos) is not plop.result:
-                return None
-            prop = origin_pack.right.getoperation()
-            if roper.getarg(var_pos) is not prop.result:
-                return None
-
-            # this can be handled by accumulation
-            return AccumPair(lnode, rnode, accum, accum_pos)
-
-        return None
-
-    def getaccumulator_variable(self, lop, rop, origin_pack):
-        args = rop.getarglist()
-        for i, arg in enumerate(args):
-            print arg, "is", lop.result
-            if arg is lop.result:
-                return arg, i
-        #
-        return None, -1
-
     def can_be_packed(self, lnode, rnode, origin_pack):
         if isomorphic(lnode.getoperation(), rnode.getoperation()):
             if lnode.independent(rnode):
@@ -645,3 +602,67 @@
             del self.packs[last_pos]
         return last_pos
 
+    def accumulates_pair(self, lnode, rnode, origin_pack):
+        # lnode and rnode are isomorphic and dependent
+        assert isinstance(origin_pack, Pair)
+        lop = lnode.getoperation()
+        opnum = lop.getopnum()
+
+        if opnum in (rop.FLOAT_ADD, rop.INT_ADD):
+            roper = rnode.getoperation()
+            assert lop.numargs() == 2 and lop.result is not None
+            accum, accum_pos = self.getaccumulator_variable(lop, roper, 
origin_pack)
+            if not accum:
+                return None
+            # the dependency exists only because of the result of lnode
+            for dep in lnode.provides():
+                if dep.to is rnode:
+                    if not dep.because_of(accum):
+                        # not quite ... this is not handlable
+                        return None
+            # get the original variable
+            accum = lop.getarg(accum_pos)
+
+            # in either of the two cases the arguments are mixed,
+            # which is not handled currently
+            var_pos = (accum_pos + 1) % 2
+            plop = origin_pack.left.getoperation()
+            if lop.getarg(var_pos) is not plop.result:
+                return None
+            prop = origin_pack.right.getoperation()
+            if roper.getarg(var_pos) is not prop.result:
+                return None
+
+            # this can be handled by accumulation
+            return AccumPair(lnode, rnode, accum, accum_pos)
+
+        return None
+
+    def getaccumulator_variable(self, lop, rop, origin_pack):
+        args = rop.getarglist()
+        for i, arg in enumerate(args):
+            if arg is lop.result:
+                return arg, i
+        #
+        return None, -1
+
+    def accumulate_prepare(self, sched_data, renamer):
+        for var, pos in self.accum_vars.items():
+            # create a new vector box for the parameters
+            box = vectorbox_outof_box(var)
+            op = ResOperation(rop.VEC_BOX, [ConstInt(0)], box)
+            sched_data.invariant_oplist.append(op)
+            result = box.clonebox()
+            # clear the box to zero
+            op = ResOperation(rop.VEC_INT_XOR, [box, box], result)
+            sched_data.invariant_oplist.append(op)
+            box = result
+            result = box.clonebox()
+            # pack the scalar value
+            op = ResOperation(getpackopnum(box.item_type),
+                              [box, var, ConstInt(0), ConstInt(1)], result)
+            sched_data.invariant_oplist.append(op)
+            # rename the variable with the box
+            renamer.start_renaming(var, result)
+
+
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to