Author: Richard Plangger <planri...@gmail.com>
Branch: vecopt-merge
Changeset: r79683:5190c354f531
Date: 2015-09-18 11:45 +0200
http://bitbucket.org/pypy/pypy/changeset/5190c354f531/

Log:    beating the integraiton tests, roughly half work already

diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py 
b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -32,9 +32,9 @@
 def build_opt_chain(metainterp_sd, enable_opts):
     optimizations = []
     unroll = 'unroll' in enable_opts    # 'enable_opts' is normally a dict
-    #if (metainterp_sd.cpu is not None and
-    #    not metainterp_sd.cpu.supports_guard_gc_type):
-    #    unroll = False
+    if (metainterp_sd.cpu is not None and
+        not metainterp_sd.cpu.supports_guard_gc_type):
+        unroll = False
     for name, opt in unroll_all_opts:
         if name in enable_opts:
             if opt is not None:
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py 
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -71,7 +71,11 @@
         if exclude_last:
             count -= 1
         while i < count: 
-            op = self.path[i].getoperation()
+            node = self.path[i]
+            if node.is_imaginary():
+                i += 1
+                continue
+            op = node.getoperation()
             if op.is_guard():
                 descr = op.getdescr()
                 if not descr:
@@ -189,9 +193,6 @@
                    isinstance(descr, compile.CompileLoopVersionDescr)
         return False
 
-    # TODO def is_guard_early_exit(self):
-    #    return self.op.getopnum() == rop.GUARD_EARLY_EXIT
-
     def loads_from_complex_object(self):
         return rop._ALWAYS_PURE_LAST <= self.op.getopnum() < rop._MALLOC_FIRST
 
@@ -1057,33 +1058,27 @@
         return self.constant - other.constant
 
     def emit_operations(self, opt, result_box=None):
-        box = self.var
+        var = self.var
         if self.is_identity():
-            return box
+            return var
         last_op = None
         if self.coefficient_mul != 1:
-            box_result = box.clonebox()
-            last_op = ResOperation(rop.INT_MUL, [box, 
ConstInt(self.coefficient_mul)], box_result)
-            opt.emit_operation(last_op)
-            box = box_result
+            args = [var, ConstInt(self.coefficient_mul)]
+            var = ResOperation(rop.INT_MUL, args)
+            opt.emit_operation(var)
         if self.coefficient_div != 1:
-            box_result = box.clonebox()
-            last_op = ResOperation(rop.INT_FLOORDIV, [box, 
ConstInt(self.coefficient_div)], box_result)
-            opt.emit_operation(last_op)
-            box = box_result
+            args = [var, ConstInt(self.coefficient_div)]
+            var = ResOperation(rop.INT_FLOORDIV, args)
+            opt.emit_operation(var)
         if self.constant > 0:
-            box_result = box.clonebox()
-            last_op = ResOperation(rop.INT_ADD, [box, 
ConstInt(self.constant)], box_result)
-            opt.emit_operation(last_op)
-            box = box_result
+            args = [var, ConstInt(self.constant)]
+            vec = ResOperation(rop.INT_ADD, args)
+            opt.emit_operation(vec)
         if self.constant < 0:
-            box_result = box.clonebox()
-            last_op = ResOperation(rop.INT_SUB, [box, 
ConstInt(self.constant)], box_result)
-            opt.emit_operation(last_op)
-            box = box_result
-        if result_box is not None:
-            last_op.result = box = result_box
-        return box
+            args = [var, ConstInt(self.constant)]
+            var = ResOperation(rop.INT_SUB, args)
+            opt.emit_operation(var)
+        return var 
 
     def compare(self, other):
         """ Returns if the two are compareable as a first result
diff --git a/rpython/jit/metainterp/optimizeopt/guard.py 
b/rpython/jit/metainterp/optimizeopt/guard.py
--- a/rpython/jit/metainterp/optimizeopt/guard.py
+++ b/rpython/jit/metainterp/optimizeopt/guard.py
@@ -133,15 +133,13 @@
 
     def emit_operations(self, opt):
         # create trace instructions for the index
-        box_lhs = self.emit_varops(opt, self.lhs, self.cmp_op.getarg(0))
-        box_rhs = self.emit_varops(opt, self.rhs, self.cmp_op.getarg(1))
-        box_result = self.cmp_op.result.clonebox()
+        lhs = self.emit_varops(opt, self.lhs, self.cmp_op.getarg(0))
+        rhs = self.emit_varops(opt, self.rhs, self.cmp_op.getarg(1))
         opnum = self.cmp_op.getopnum()
-        cmp_op = ResOperation(opnum, [box_lhs, box_rhs], box_result)
+        cmp_op = ResOperation(opnum, [lhs, rhs])
         opt.emit_operation(cmp_op)
         # emit that actual guard
-        guard = self.op.clone()
-        guard.setarg(0, box_result)
+        guard = ResOperation(self.op.getopnum(), [cmp_op], self.op.getdescr())
         opt.emit_operation(guard)
         self.setindex(opt.operation_position()-1)
         self.setoperation(guard)
@@ -179,13 +177,12 @@
 
 class GuardStrengthenOpt(object):
     """ Note that this optimization is only used in the vector optimizer (yet) 
"""
-    def __init__(self, index_vars, has_two_labels):
+    def __init__(self, index_vars):
         self.index_vars = index_vars
         self._newoperations = []
         self.strength_reduced = 0 # how many guards could be removed?
         self.strongest_guards = {}
         self.guards = {}
-        self.has_two_labels = has_two_labels
 
     def collect_guard_information(self, loop):
         operations = loop.operations
@@ -251,11 +248,11 @@
                 else:
                     self.emit_operation(op)
                     continue
-            if op.result:
-                index_var = self.index_vars.get(op.result, None)
+            if not op.returns_void():
+                index_var = self.index_vars.get(op, None)
                 if index_var:
                     if not index_var.is_identity():
-                        index_var.emit_operations(self, op.result)
+                        index_var.emit_operations(self, op)
                         continue
             self.emit_operation(op)
         #
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -124,7 +124,6 @@
         """ Emit all the operations into the oplist parameter.
             Initiates the scheduling. """
         assert isinstance(state, SchedulerState)
-        import pdb; pdb.set_trace()
         while state.has_more():
             node = self.next(state)
             if node:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py 
b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -51,8 +51,10 @@
         else:
             label = loop.operations[0]
             label.setdescr(TargetToken(token))
-        loop = VectorLoop(label, loop.operations[0:-1], loop.operations[-1])
+        jump = loop.operations[-1]
+        loop = VectorLoop(label, loop.operations[0:-1], jump)
         loop.jump.setdescr(token)
+        # TODO
         for op in loop.operations:
             if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None:
                 op.setdescr(ResumeAtLoopHeaderDescr())
@@ -184,10 +186,6 @@
         assert not m1.is_adjacent_to(m2)
         assert not m2.is_adjacent_to(m1)
 
-    def getmemref(self, idx):
-        node = self.last_graph.getnode(idx)
-        return self.last_graph.memory_refs[node]
-
 class BaseTestDependencyGraph(DependencyBaseTest):
 
     def test_index_var_basic(self):
@@ -264,9 +262,9 @@
         jump() # 4:
         """
         graph = self.assert_dependencies(ops, full_check=True)
-        self.assert_dependent(1,2)
-        self.assert_dependent(2,3)
-        self.assert_dependent(1,3)
+        self.assert_dependent(graph, 1,2)
+        self.assert_dependent(graph, 2,3)
+        self.assert_dependent(graph, 1,3)
 
     def test_def_use_jump_use_def(self):
         ops = """
@@ -417,7 +415,7 @@
         jump(p0, i1, i2) # 3:
         """
         self.assert_dependencies(ops, full_check=True)
-        self.assert_dependent(1,2)
+        self.assert_dependent(graph, 1,2)
 
     def test_setarrayitem_dont_depend_with_memref_info(self):
         ops="""
@@ -457,7 +455,7 @@
         jump(i24, i19, i21, i3, i4, i5, i22, i7) # 21:
         """
         self.assert_dependencies(ops, full_check=False)
-        self.assert_dependent(2,12)
+        self.assert_dependent(graph, 2,12)
 
     def test_getfield(self):
         trace = """
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -11,20 +11,31 @@
 import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import rpython.jit.metainterp.optimizeopt.virtualize as virtualize
 from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
-from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer, 
MemoryRef,
-        isomorphic, Pair, NotAVectorizeableLoop, NotAProfitableLoop, 
GuardStrengthenOpt,
-        CostModel, VectorLoop)
-from rpython.jit.metainterp.optimizeopt.schedule import (Scheduler, 
SchedulerState)
+from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer,
+        MemoryRef, isomorphic, Pair, NotAVectorizeableLoop, VectorLoop,
+        NotAProfitableLoop, GuardStrengthenOpt, CostModel, X86_CostModel)
+from rpython.jit.metainterp.optimizeopt.schedule import (Scheduler,
+        SchedulerState, VecScheduleState)
 from rpython.jit.metainterp.optimize import InvalidLoop
 from rpython.jit.metainterp import compile
 from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo
 
 class FakeJitDriverStaticData(object):
     vec=True
 
+class FakeLoopInfo(LoopVersionInfo):
+    def __init__(self, loop):
+        self.target_token = loop.label.getdescr()
+        self.label_op = loop.label
+        self.insert_index = -1
+        self.versions = []
+        self.leads_to = {}
+        self.descrs = []
+
 class FakeCostModel(CostModel):
-    def __init__(self):
-        CostModel.__init__(self, 0, 16)
+    def __init__(self, cpu):
+        CostModel.__init__(self, cpu, 16)
     def record_cast_int(self): pass
     def record_pack_savings(self, pack, times): pass
     def record_vector_pack(self, box, index, count): pass
@@ -34,6 +45,19 @@
     def profitable(self):
         return True
 
+def index_of_first(opnum, operations, pass_by=0):
+    for i,op in enumerate(operations):
+        if op.getopnum() == opnum:
+            if pass_by == 0:
+                return i
+            else:
+                pass_by -= 1
+    return -1
+
+def find_first_index(loop, opnum, pass_by=0):
+    """ return the first index of the operation having the same opnum or -1 """
+    return index_of_first(opnum, loop.operations, pass_by)
+
 ARCH_VEC_REG_SIZE = 16
 
 class VecTestHelper(DependencyBaseTest):
@@ -43,7 +67,11 @@
     jitdriver_sd = FakeJitDriverStaticData()
 
     def assert_vectorize(self, loop, expected_loop, call_pure_results=None):
-        self._do_optimize_loop(loop)
+        jump = ResOperation(rop.LABEL, loop.jump.getarglist(), 
loop.jump.getdescr())
+        compile_data = compile.LoopCompileData(loop.label, jump, 
loop.operations)
+        state = self._do_optimize_loop(compile_data)
+        loop.label = state[0].label_op
+        loop.opererations = state[1]
         self.assert_equal(loop, expected_loop)
 
     def vectoroptimizer(self, loop):
@@ -56,13 +84,11 @@
     def earlyexit(self, loop):
         opt = self.vectoroptimizer(loop)
         graph = opt.analyse_index_calculations(loop)
-        graph.view()
         state = SchedulerState(graph)
         opt.schedule(state)
         return graph.loop
 
     def vectoroptimizer_unrolled(self, loop, unroll_factor = -1):
-        loop.snapshot()
         opt = self.vectoroptimizer(loop)
         opt.linear_find_smallest_type(loop)
         if unroll_factor == -1 and opt.smallest_type_bytes == 0:
@@ -71,76 +97,84 @@
             unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE)
             print ""
             print "unroll factor: ", unroll_factor, opt.smallest_type_bytes
-        if opt.loop.find_first_index(rop.GUARD_EARLY_EXIT) == -1:
-            idx = loop.find_first_index(rop.LABEL)
-            guard = ResOperation(rop.GUARD_EARLY_EXIT, [], None)
-            guard.setfailargs([])
-            guard.setdescr(compile.ResumeAtLoopHeaderDescr())
-            loop.operations.insert(idx+1, guard)
-        self.show_dot_graph(DependencyGraph(opt.loop), "original_" + 
self.test_name)
-        graph = opt.analyse_index_calculations()
+        # TODO if opt.loop.find_first_index(rop.GUARD_EARLY_EXIT) == -1:
+        #    idx = loop.find_first_index(rop.LABEL)
+        #    guard = ResOperation(rop.GUARD_EARLY_EXIT, [], None)
+        #    guard.setfailargs([])
+        #    guard.setdescr(compile.ResumeAtLoopHeaderDescr())
+        #    loop.operations.insert(idx+1, guard)
+        self.show_dot_graph(DependencyGraph(loop), "original_" + 
self.test_name)
+        graph = opt.analyse_index_calculations(loop)
         if graph is not None:
-            cycle = opt.dependency_graph.cycles()
+            cycle = graph.cycles()
             if cycle is not None:
                 print "CYCLE found %s" % cycle
-            self.show_dot_graph(opt.dependency_graph, "early_exit_" + 
self.test_name)
+            self.show_dot_graph(graph, "early_exit_" + self.test_name)
             assert cycle is None
-            loop.operations = opt.schedule(False)
+            state = SchedulerState(graph)
+            opt.schedule(state)
         opt.unroll_loop_iterations(loop, unroll_factor)
-        opt.loop.operations = opt.get_newoperations()
-        self.debug_print_operations(opt.loop)
-        opt.clear_newoperations()
+        self.debug_print_operations(loop)
         graph = DependencyGraph(loop)
-        self.last_graph = graph
-        self.show_dot_graph(self.last_graph, self.test_name)
+        self.last_graph = graph # legacy for test_dependency
+        self.show_dot_graph(graph, self.test_name)
+        def gmr(i):
+            return graph.memory_refs[graph.nodes[i]]
+        graph.getmemref = gmr
         return opt, graph
 
     def init_packset(self, loop, unroll_factor = -1):
         opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
         opt.find_adjacent_memory_refs(graph)
-        return opt
+        return opt, graph
 
     def extend_packset(self, loop, unroll_factor = -1):
         opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
         opt.find_adjacent_memory_refs(graph)
         opt.extend_packset()
-        return opt
+        return opt, graph
 
     def combine_packset(self, loop, unroll_factor = -1):
         opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
         opt.find_adjacent_memory_refs(graph)
         opt.extend_packset()
         opt.combine_packset()
-        return opt
+        return opt, graph
 
     def schedule(self, loop, unroll_factor = -1, with_guard_opt=False):
+        info = FakeLoopInfo(loop)
+        info.snapshot(loop.operations + [loop.jump], loop.label)
         opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
-        opt.costmodel = FakeCostModel()
         opt.find_adjacent_memory_refs(graph)
         opt.extend_packset()
         opt.combine_packset()
-        opt.schedule(graph, True)
+        costmodel = FakeCostModel(self.cpu)
+        state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
+        opt.schedule(state)
         if with_guard_opt:
-            gso = GuardStrengthenOpt(opt.dependency_graph.index_vars, 
opt.has_two_labels)
-            gso.propagate_all_forward(opt.loop)
+            gso = GuardStrengthenOpt(graph.index_vars)
+            gso.propagate_all_forward(info, loop)
         return opt
 
     def vectorize(self, loop, unroll_factor = -1):
-        opt = self.vectoroptimizer_unrolled(loop, unroll_factor)
-        opt.find_adjacent_memory_refs()
+        info = FakeLoopInfo(loop)
+        info.snapshot(loop.operations + [loop.jump], loop.label)
+        opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
+        opt.find_adjacent_memory_refs(graph)
         opt.extend_packset()
         opt.combine_packset()
-        opt.costmodel.reset_savings()
-        opt.schedule(True)
-        if not opt.costmodel.profitable():
+        costmodel = X86_CostModel(self.cpu, 0)
+        state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
+        opt.schedule(state)
+        if not costmodel.profitable():
             raise NotAProfitableLoop()
-        gso = GuardStrengthenOpt(opt.dependency_graph.index_vars, 
opt.has_two_labels)
-        gso.propagate_all_forward(opt.loop)
+        gso = GuardStrengthenOpt(graph.index_vars)
+        gso.propagate_all_forward(info, loop)
         return opt
 
     def assert_unroll_loop_equals(self, loop, expected_loop, \
                      unroll_factor = -1):
-        vectoroptimizer = self.vectoroptimizer_unrolled(loop, unroll_factor)
+        self.vectoroptimizer_unrolled(loop, unroll_factor)
         self.assert_equal(loop, expected_loop)
 
     def assert_pack(self, pack, indices):
@@ -171,23 +205,24 @@
 
     def assert_packset_not_contains_pair(self, packset, x, y):
         for pack in packset.packs:
-            if pack.left.opidx == x and \
-               pack.right.opidx == y:
+            if pack.leftmost(node=True).opidx == x and \
+               pack.rightmost(node=True).opidx == y:
                 pytest.fail("must not find packset with indices {x},{y}" \
                                 .format(x=x,y=y))
 
     def assert_packset_contains_pair(self, packset, x, y):
         for pack in packset.packs:
             if isinstance(pack, Pair):
-                if pack.left.opidx == x and \
-                   pack.right.opidx == y:
+                if pack.leftmost(node=True).opidx == x and \
+                   pack.rightmost(node=True).opidx == y:
                     break
         else:
             pytest.fail("can't find a pack set for indices {x},{y}" \
                             .format(x=x,y=y))
-    def assert_has_memory_ref_at(self, idx):
-        node = self.last_graph.nodes[idx]
-        assert node in self.last_graph.memory_refs, \
+    def assert_has_memory_ref_at(self, graph, idx):
+        idx -= 1 # label is not in the nodes
+        node = graph.nodes[idx]
+        assert node in graph.memory_refs, \
             "operation %s at pos %d has no memory ref!" % \
                 (node.getoperation(), node.getindex())
 
@@ -248,7 +283,7 @@
         """ it currently rejects pointer arrays """
         ops = """
         [p0,i0]
-        raw_load_r(p0,i0,descr=arraydescr2)
+        getarrayitem_gc_r(p0,i0,descr=arraydescr2)
         jump(p0,i0)
         """
         self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
@@ -257,9 +292,9 @@
         """ it currently rejects pointer arrays """
         ops = """
         [p0,i0]
-        i2 = getarrayitem_gc(p0,i0,descr=floatarraydescr)
+        i2 = getarrayitem_gc_i(p0,i0,descr=arraydescr)
         i1 = int_add(i0,1)
-        i3 = getarrayitem_gc(p0,i1,descr=floatarraydescr)
+        i3 = getarrayitem_gc_i(p0,i1,descr=arraydescr)
         i4 = int_add(i1,1)
         jump(p0,i4)
         """
@@ -267,11 +302,12 @@
         [p0,i0]
         i1 = int_add(i0,1)
         i2 = int_add(i0,2)
-        i3 = vec_getarrayitem_gc(p0,i0,2,descr=floatarraydescr)
+        v3[2xi64] = vec_getarrayitem_gc_i(p0,i0,descr=arraydescr)
         jump(p0,i2)
         """
-        vopt = self.vectorize(self.parse_loop(ops),0)
-        self.assert_equal(vopt.loop, self.parse_loop(opt))
+        loop = self.parse_loop(ops)
+        vopt = self.vectorize(loop,0)
+        self.assert_equal(loop, self.parse_loop(opt))
 
     def test_vect_unroll_char(self):
         """ a 16 byte vector register can hold 16 bytes thus 
@@ -306,14 +342,14 @@
         [p0,p1,p2,i0]
         i4 = int_add(i0, 1)
         i5 = int_le(i4, 10)
-        guard_true(i5) []
+        guard_true(i5) [p0,p1,p2,i0]
         i1 = raw_load_i(p1, i0, descr=floatarraydescr)
         i2 = raw_load_i(p2, i0, descr=floatarraydescr)
         i3 = int_add(i1,i2)
         raw_store(p0, i0, i3, descr=floatarraydescr)
         i9 = int_add(i4, 1)
         i10 = int_le(i9, 10)
-        guard_true(i10) []
+        guard_true(i10) [p0,p1,p2,i4]
         i6 = raw_load_i(p1, i4, descr=floatarraydescr)
         i7 = raw_load_i(p2, i4, descr=floatarraydescr)
         i8 = int_add(i6,i7)
@@ -338,9 +374,9 @@
         raw_load_i(p0,i0,descr=arraydescr)
         jump(p0,i0)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        assert len(vopt.dependency_graph.memory_refs) == 1
-        self.assert_has_memory_ref_at(1)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        assert len(graph.memory_refs) == 1
+        self.assert_has_memory_ref_at(graph, 1)
 
     def test_array_operation_indices_unrolled_1(self):
         ops = """
@@ -348,10 +384,10 @@
         raw_load_i(p0,i0,descr=chararraydescr)
         jump(p0,i0)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),1)
-        assert len(vopt.dependency_graph.memory_refs) == 2
-        self.assert_has_memory_ref_at(1)
-        self.assert_has_memory_ref_at(2)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),1)
+        assert len(graph.memory_refs) == 2
+        self.assert_has_memory_ref_at(graph, 1)
+        self.assert_has_memory_ref_at(graph, 2)
 
     def test_array_operation_indices_unrolled_2(self):
         ops = """
@@ -361,20 +397,20 @@
         jump(p0,i3,i4)
         """
         loop = self.parse_loop(ops)
-        vopt = self.vectoroptimizer_unrolled(loop,0)
-        assert len(vopt.dependency_graph.memory_refs) == 2
-        self.assert_has_memory_ref_at(1)
-        self.assert_has_memory_ref_at(2)
+        vopt, graph = self.vectoroptimizer_unrolled(loop,0)
+        assert len(graph.memory_refs) == 2
+        self.assert_has_memory_ref_at(graph, 1)
+        self.assert_has_memory_ref_at(graph, 2)
         #
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),1)
-        assert len(vopt.dependency_graph.memory_refs) == 4
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),1)
+        assert len(graph.memory_refs) == 4
         for i in [1,2,3,4]:
-            self.assert_has_memory_ref_at(i)
+            self.assert_has_memory_ref_at(graph, i)
         #
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),3)
-        assert len(vopt.dependency_graph.memory_refs) == 8
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),3)
+        assert len(graph.memory_refs) == 8
         for i in [1,2,3,4,5,6,7,8]:
-            self.assert_has_memory_ref_at(i)
+            self.assert_has_memory_ref_at(graph, i)
 
     def test_array_memory_ref_adjacent_1(self):
         ops = """
@@ -384,12 +420,12 @@
         jump(p0,i1)
         """
         loop = self.parse_loop(ops)
-        vopt = self.vectoroptimizer_unrolled(loop,1)
-        vopt.find_adjacent_memory_refs()
-        assert len(vopt.dependency_graph.memory_refs) == 2
+        vopt, graph = self.vectoroptimizer_unrolled(loop,1)
+        vopt.find_adjacent_memory_refs(graph)
+        assert len(graph.memory_refs) == 2
 
-        mref1 = self.getmemref(loop.find_first_index(rop.RAW_LOAD))
-        mref3 = self.getmemref(loop.find_first_index(rop.RAW_LOAD,1))
+        mref1 = graph.getmemref(find_first_index(loop, rop.RAW_LOAD_I))
+        mref3 = graph.getmemref(find_first_index(loop, rop.RAW_LOAD_I,1))
         assert isinstance(mref1, MemoryRef)
         assert isinstance(mref3, MemoryRef)
 
@@ -402,9 +438,9 @@
         i3 = raw_load_i(p0,i0,descr=chararraydescr)
         jump(p0,i0)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref1 = self.getmemref(1)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref1 = graph.getmemref(0)
         assert isinstance(mref1, MemoryRef)
         assert mref1.index_var.coefficient_mul == 1
         assert mref1.index_var.constant == 0
@@ -416,9 +452,9 @@
         i3 = raw_load_i(p0,i1,descr=chararraydescr)
         jump(p0,i1)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref1 = self.getmemref(2)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref1 = graph.getmemref(1)
         assert isinstance(mref1, MemoryRef)
         assert mref1.index_var.coefficient_mul == 1
         assert mref1.index_var.constant == 1
@@ -430,9 +466,9 @@
         i3 = raw_load_i(p0,i1,descr=chararraydescr)
         jump(p0,i1)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref1 = self.getmemref(2)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref1 = graph.getmemref(1)
         assert isinstance(mref1, MemoryRef)
         assert mref1.index_var.coefficient_mul == 1
         assert mref1.index_var.constant == -1
@@ -445,9 +481,9 @@
         i3 = raw_load_i(p0,i2,descr=chararraydescr)
         jump(p0,i1)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref1 = self.getmemref(3)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref1 = graph.getmemref(2)
         assert isinstance(mref1, MemoryRef)
         assert mref1.index_var.coefficient_mul == 3
         assert mref1.index_var.constant == 3
@@ -462,9 +498,9 @@
         i5 = raw_load_i(p0,i4,descr=chararraydescr)
         jump(p0,i4)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref1 = self.getmemref(5)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref1 = graph.getmemref(4)
         assert isinstance(mref1, MemoryRef)
         assert mref1.index_var.coefficient_mul == 18
         assert mref1.index_var.constant == 48
@@ -480,9 +516,9 @@
         i7 = raw_load_i(p0,i6,descr=chararraydescr)
         jump(p0,i6)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref1 = self.getmemref(7)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref1 = graph.getmemref(6)
         assert isinstance(mref1, MemoryRef)
         assert mref1.index_var.coefficient_mul == 1026
         assert mref1.index_var.coefficient_div == 1
@@ -498,9 +534,9 @@
         i5 = raw_load_i(p0,i4,descr=chararraydescr)
         jump(p0,i4)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref1 = self.getmemref(5)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref1 = graph.getmemref(4)
         assert isinstance(mref1, MemoryRef)
         assert mref1.index_var.coefficient_mul == 6
         assert mref1.index_var.coefficient_div == 1
@@ -516,16 +552,16 @@
         jump(p0,i1,i6)
         """
         loop = self.parse_loop(ops)
-        vopt = self.vectoroptimizer_unrolled(loop,1)
-        vopt.find_adjacent_memory_refs()
+        vopt, graph = self.vectoroptimizer_unrolled(loop,1)
+        vopt.find_adjacent_memory_refs(graph)
 
-        f = lambda x: loop.find_first_index(rop.RAW_LOAD, x)
+        f = lambda x: find_first_index(loop, rop.RAW_LOAD_I, x)
         indices = [f(0),f(1),f(2),f(3)]
         for i in indices:
-            self.assert_has_memory_ref_at(i)
-        assert len(vopt.dependency_graph.memory_refs) == 4
+            self.assert_has_memory_ref_at(graph, i+1)
+        assert len(graph.memory_refs) == 4
 
-        mref1, mref3, mref5, mref7 = [self.getmemref(i) for i in indices]
+        mref1, mref3, mref5, mref7 = [graph.getmemref(i) for i in indices]
         assert isinstance(mref1, MemoryRef)
         assert isinstance(mref3, MemoryRef)
         assert isinstance(mref5, MemoryRef)
@@ -545,9 +581,9 @@
         i3 = raw_load_i(p0,i2,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(3)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref = graph.getmemref(2)
         assert mref.index_var.coefficient_div == 16
         ops = """
         [p0,i0]
@@ -556,9 +592,9 @@
         i3 = raw_load_i(p0,i2,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(3)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref = graph.getmemref(2)
         assert mref.index_var.coefficient_div == 2
         assert mref.index_var.constant == 4
         ops = """
@@ -571,10 +607,10 @@
         i6 = raw_load_i(p0,i5,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(5)
-        mref2 = self.getmemref(6)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref = graph.getmemref(2)
+        mref2 = graph.getmemref(5)
 
         self.assert_memory_ref_not_adjacent(mref, mref2)
         assert mref != mref2
@@ -591,10 +627,10 @@
         i7 = raw_load_i(p0,i6,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(6)
-        mref2 = self.getmemref(7)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref = graph.getmemref(2)
+        mref2 = graph.getmemref(6)
 
         self.assert_memory_ref_not_adjacent(mref, mref2)
         assert mref == mref2
@@ -611,10 +647,10 @@
         i7 = raw_load_i(p0,i6,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
-        vopt.find_adjacent_memory_refs()
-        mref = self.getmemref(6)
-        mref2 = self.getmemref(7)
+        vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
+        vopt.find_adjacent_memory_refs(graph)
+        mref = graph.getmemref(2)
+        mref2 = graph.getmemref(6)
 
         self.assert_memory_ref_not_adjacent(mref, mref2)
         assert mref != mref2
@@ -622,17 +658,17 @@
     def test_packset_init_simple(self):
         ops = """
         [p0,i0]
-        i3 = getarrayitem_raw(p0, i0, descr=chararraydescr)
+        i3 = getarrayitem_raw_i(p0, i0, descr=chararraydescr)
         i1 = int_add(i0, 1)
         i2 = int_le(i1, 16)
         guard_true(i2) [p0, i0]
         jump(p0,i1)
         """
         loop = self.parse_loop(ops)
-        vopt = self.init_packset(loop,1)
+        vopt, graph = self.init_packset(loop,1)
         self.assert_independent(4,8)
         assert vopt.packset is not None
-        assert len(vopt.dependency_graph.memory_refs) == 2
+        assert len(graph.memory_refs) == 2
         assert len(vopt.packset.packs) == 1
 
     def test_packset_init_raw_load_not_adjacent_and_adjacent(self):
@@ -642,8 +678,8 @@
         jump(p0,i0)
         """
         loop = self.parse_loop(ops)
-        vopt = self.init_packset(loop,3)
-        assert len(vopt.dependency_graph.memory_refs) == 4
+        vopt, graph = self.init_packset(loop,3)
+        assert len(graph.memory_refs) == 4
         assert len(vopt.packset.packs) == 0
         ops = """
         [p0,i0]
@@ -652,8 +688,8 @@
         jump(p0,i2)
         """
         loop = self.parse_loop(ops)
-        vopt = self.init_packset(loop,3)
-        assert len(vopt.dependency_graph.memory_refs) == 4
+        vopt, graph = self.init_packset(loop,3)
+        assert len(graph.memory_refs) == 4
         assert len(vopt.packset.packs) == 3
         for i in range(3):
             x = (i+1)*2
@@ -667,24 +703,24 @@
         i1 = int_add(i0, 1)
         i2 = int_le(i1, 16)
         guard_true(i2) [p0, i0]
-        i3 = getarrayitem_raw(p0, i1, descr=chararraydescr)
+        i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
         jump(p0,i1)
         """
         loop = self.parse_loop(ops)
-        vopt = self.init_packset(loop,15)
-        assert len(vopt.dependency_graph.memory_refs) == 16
+        vopt, graph = self.init_packset(loop,15)
+        assert len(graph.memory_refs) == 16
         assert len(vopt.packset.packs) == 15
         # assure that memory refs are not adjacent for all
         for i in range(15):
             for j in range(15):
                 try:
                     if i-4 == j or i+4 == j:
-                        mref1 = self.getmemref(i)
-                        mref2 = self.getmemref(j)
+                        mref1 = graph.getmemref(i)
+                        mref2 = graph.getmemref(j)
                         assert mref1.is_adjacent_to(mref2)
                     else:
-                        mref1 = self.getmemref(i)
-                        mref2 = self.getmemref(j)
+                        mref1 = graph.getmemref(i)
+                        mref2 = graph.getmemref(j)
                         assert not mref1.is_adjacent_to(mref2)
                 except KeyError:
                     pass
@@ -697,25 +733,20 @@
     def test_isomorphic_operations(self):
         ops_src = """
         [p1,p0,i0]
-        i3 = getarrayitem_raw(p0, i0, descr=chararraydescr)
+        i3 = getarrayitem_raw_i(p0, i0, descr=chararraydescr)
         i1 = int_add(i0, 1)
         i2 = int_le(i1, 16)
-        i4 = getarrayitem_raw(p0, i1, descr=chararraydescr)
-        i5 = getarrayitem_raw(p1, i1, descr=floatarraydescr)
-        i6 = getarrayitem_raw(p0, i1, descr=floatarraydescr)
+        i4 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
+        f5 = getarrayitem_raw_f(p1, i1, descr=floatarraydescr)
+        f6 = getarrayitem_raw_f(p0, i1, descr=floatarraydescr)
         guard_true(i2) [p0, i0]
         jump(p1,p0,i1)
         """
         loop = self.parse_loop(ops_src)
         ops = loop.operations
-        assert isomorphic(ops[1], ops[4])
+        assert isomorphic(ops[0], ops[3])
         assert not isomorphic(ops[0], ops[1])
         assert not isomorphic(ops[0], ops[5])
-        # TODO strong assumptions do hold here?
-        #assert not isomorphic(ops[4], ops[5])
-        #assert not isomorphic(ops[5], ops[6])
-        #assert not isomorphic(ops[4], ops[6])
-        #assert not isomorphic(ops[1], ops[6])
 
     def test_packset_extend_simple(self):
         ops = """
@@ -723,33 +754,33 @@
         i1 = int_add(i0, 1)
         i2 = int_le(i1, 16)
         guard_true(i2) [p0, i0]
-        i3 = getarrayitem_raw(p0, i1, descr=chararraydescr)
+        i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
         i4 = int_add(i3, 1)
         jump(p0,i1)
         """
         loop = self.parse_loop(ops)
-        vopt = self.extend_packset(loop,1)
-        assert len(vopt.dependency_graph.memory_refs) == 2
+        vopt, graph = self.extend_packset(loop,1)
+        assert len(graph.memory_refs) == 2
         self.assert_independent(5,10)
         assert len(vopt.packset.packs) == 2
-        self.assert_packset_empty(vopt.packset, len(loop.operations),
+        self.assert_packset_empty(vopt.packset,
+                                  len(loop.operations),
                                   [(5,10), (4,9)])
 
     def test_packset_extend_load_modify_store(self):
         ops = """
         [p0,i0]
-        guard_early_exit() []
         i1 = int_add(i0, 1)
         i2 = int_le(i1, 16)
         guard_true(i2) [p0, i0]
-        i3 = getarrayitem_raw(p0, i1, descr=chararraydescr)
+        i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
         i4 = int_mul(i3, 2)
         setarrayitem_raw(p0, i1, i4, descr=chararraydescr)
         jump(p0,i1)
         """
         loop = self.parse_loop(ops)
-        vopt = self.extend_packset(loop,1)
-        assert len(vopt.dependency_graph.memory_refs) == 4
+        vopt, graph = self.extend_packset(loop,1)
+        assert len(graph.memory_refs) == 4
         self.assert_independent(4,10)
         self.assert_independent(5,11)
         self.assert_independent(6,12)
@@ -763,15 +794,18 @@
                               ('int',2,   [(0,(2,4)),(1,(6,8))]),
                               ('singlefloat',1,[(0,(2,4,6,8))])])
     def test_packset_combine_simple(self,descr,packs,packidx):
+        suffix = '_i'
+        if 'float' in descr:
+            suffix = '_f'
         ops = """
         [p0,i0]
-        i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr)
+        i3 = getarrayitem_raw{suffix}(p0, i0, descr={descr}arraydescr)
         i1 = int_add(i0,1)
         jump(p0,i1)
-        """.format(descr=descr)
+        """.format(descr=descr,suffix=suffix)
         loop = self.parse_loop(ops)
-        vopt = self.combine_packset(loop,3)
-        assert len(vopt.dependency_graph.memory_refs) == 4
+        vopt, graph = self.combine_packset(loop,3)
+        assert len(graph.memory_refs) == 4
         assert len(vopt.packset.packs) == packs
         for i,t in packidx:
             self.assert_pack(vopt.packset.packs[i], t)
@@ -832,7 +866,6 @@
     def test_packset_vector_operation(self, op, descr, stride):
         ops = """
         [p0,p1,p2,i0]
-        guard_early_exit() []
         i1 = int_add(i0, {stride})
         i10 = int_le(i1, 128)
         guard_true(i10) []
@@ -864,7 +897,6 @@
     def test_schedule_vector_operation(self, op, descr, stride):
         ops = """
         [p0,p1,p2,i0] # 0
-        guard_early_exit() []
         i10 = int_le(i0, 128)  # 1, 8, 15, 22
         guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23
         i2 = getarrayitem_raw(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24
@@ -882,8 +914,8 @@
         i11 = int_le(i1, 128)
         guard_true(i11) []
         i12 = int_add(i1, {stride})
-        v1 = vec_getarrayitem_raw(p0, i0, 2, descr={descr}arraydescr)
-        v2 = vec_getarrayitem_raw(p1, i0, 2, descr={descr}arraydescr)
+        v1 = vec_getarrayitem_raw(p0, i0, descr={descr}arraydescr)
+        v2 = vec_getarrayitem_raw(p1, i0, descr={descr}arraydescr)
         v3 = {op}(v1,v2)
         vec_setarrayitem_raw(p2, i0, v3, descr={descr}arraydescr)
         jump(p0,p1,p2,i12)
@@ -895,7 +927,6 @@
     def test_vschedule_trace_1(self):
         ops = """
         [i0, i1, i2, i3, i4]
-        guard_early_exit() []
         i6 = int_mul(i0, 8)
         i7 = raw_load(i2, i6, descr=arraydescr)
         i8 = raw_load(i3, i6, descr=arraydescr)
@@ -928,7 +959,6 @@
     def test_collapse_index_guard_1(self):
         ops = """
         [p0,i0]
-        guard_early_exit() [p0,i0]
         i1 = getarrayitem_raw(p0, i0, descr=chararraydescr)
         i2 = int_add(i0, 1)
         i3 = int_lt(i2, 102)
@@ -949,7 +979,7 @@
         {dead_code}
         i500 = int_add(i0, 16)
         i501 = int_lt(i2, 102)
-        i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr)
+        v10[16xi8] = vec_getarrayitem_raw(p0, i0, descr=chararraydescr)
         jump(p0,i2)
         """.format(dead_code=dead_code)
         vopt = self.schedule(self.parse_loop(ops),15,with_guard_opt=True)
@@ -958,7 +988,6 @@
     def test_too_small_vector(self):
         ops = """
         [p0,i0]
-        guard_early_exit() [p0,i0]
         i1 = getarrayitem_raw(p0, 0, descr=chararraydescr) # constant index
         i2 = getarrayitem_raw(p0, 1, descr=chararraydescr) # constant index
         i4 = int_add(i1, i2)
@@ -976,7 +1005,6 @@
     def test_constant_expansion(self):
         ops = """
         [p0,i0]
-        guard_early_exit() [p0,i0]
         i1 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
         i4 = int_sub(i1, 42)
         i3 = int_add(i0,1)
@@ -987,8 +1015,8 @@
         opt="""
         [p0,i0]
         label(p0,i0)
-        v3 = vec_int_expand(42, 2)
-        label(p0,i0,v3)
+        v3[2xf64] = vec_expand_f(42.0)
+        label(p0,i0,v3[2xf64])
         i20 = int_add(i0, 1)
         i30 = int_lt(i20, 10)
         i2 = int_add(i0, 2)
@@ -996,9 +1024,9 @@
         guard_true(i3) [p0,i0]
         i4 = int_add(i0, 2)
         i5 = int_lt(i2, 10)
-        v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
-        v2 = vec_int_sub(v1, v3)
-        jump(p0,i2,v3)
+        v1[2xf64] = vec_getarrayitem_raw(p0, i0, descr=floatarraydescr)
+        v2[2xf64] = vec_int_sub(v1[2xf64], v3[2xf64])
+        jump(p0,i2,v3[2xf64])
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
         self.assert_equal(vopt.loop, self.parse_loop(opt,add_label=False))
@@ -1006,7 +1034,6 @@
     def test_variable_expansion(self):
         ops = """
         [p0,i0,f3]
-        guard_early_exit() [p0,i0]
         f1 = getarrayitem_raw(p0, i0, descr=floatarraydescr)
         f4 = int_add(f1, f3)
         i3 = int_add(i0,1)
@@ -1017,8 +1044,8 @@
         opt="""
         [p0,i0,f3]
         label(p0,i0,f3)
-        v3 = vec_float_expand(f3,2)
-        label(p0,i0,f3,v3)
+        v3[2xf64] = vec_expand_f(f3)
+        label(p0,i0,f3,v3[2xf64])
         i20 = int_add(i0, 1)
         i30 = int_lt(i20, 10)
         i2 = int_add(i0, 2)
@@ -1026,9 +1053,9 @@
         guard_true(i3) [p0,i0,f3]
         i4 = int_add(i0, 2)
         i5 = int_lt(i2, 10)
-        v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
-        v2 = vec_int_add(v1, v3)
-        jump(p0,i2,f3,v3)
+        v1[2xf64] = vec_getarrayitem_raw(p0, i0, descr=floatarraydescr)
+        v2[2xf64] = vec_int_add(v1[2xf64], v3[2xf64])
+        jump(p0,i2,f3,v3[2xf64])
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
         self.assert_equal(vopt.loop, self.parse_loop(opt, add_label=False))
@@ -1036,7 +1063,6 @@
     def test_accumulate_basic(self):
         trace = """
         [p0, i0, f0]
-        guard_early_exit() [p0, i0, f0]
         f1 = raw_load(p0, i0, descr=floatarraydescr)
         f2 = float_add(f0, f1)
         i1 = int_add(i0, 8)
@@ -1063,7 +1089,6 @@
     def test_element_f45_in_guard_failargs(self):
         ops = """
         [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, 
i44, i21, i4, i0, i18]
-        guard_early_exit() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, 
i43, f34, i28, p36, i41]
         f45 = raw_load(i21, i44, descr=floatarraydescr) 
         guard_not_invalidated() [p38, p12, p9, p14, f45, p39, i37, i44, f35, 
i40, p42, i43, None, i28, p36, i41]
         i46 = int_add(i44, 8) 
@@ -1107,7 +1132,6 @@
     def test_shrink_vector_size(self):
         ops = """
         [p0,p1,i1]
-        guard_early_exit() []
         f1 = getarrayitem_raw(p0, i1, descr=floatarraydescr)
         i2 = cast_float_to_singlefloat(f1)
         setarrayitem_raw(p1, i1, i2, descr=singlefloatarraydescr)
@@ -1143,7 +1167,6 @@
     def test_castup_arith_castdown(self):
         ops = """
         [p0,p1,p2,i0,i4]
-        guard_early_exit() []
         i10 = raw_load(p0, i0, descr=singlefloatarraydescr)
         i1 = int_add(i0, 4)
         i11 = raw_load(p1, i1, descr=singlefloatarraydescr)
@@ -1196,7 +1219,6 @@
     def test_truediv_abs_neg_float(self):
         ops = """
         [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19]
-        guard_early_exit() [p8, p7, p5, p4, p2, f9, i12, i11, p10, i15, i14, 
p13]
         f20 = raw_load(i16, i12, descr=floatarraydescr)
         guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, 
i15, i14, p13]
         i23 = int_add(i12, 8)
@@ -1216,7 +1238,6 @@
     def test_axis_sum(self):
         trace = """
         [i1, p10, i11, p8, i12, p3, p4, p13, i14, i15, p6, p9, i16, i17, i18, 
i19, i20, i21, i22, i23]
-        guard_early_exit() [i1, p9, p8, p6, p4, p3, i11, i15, p13, i12, i14, 
p10]
         f24 = raw_load(i16, i12, descr=floatarraydescr)
         guard_not_invalidated() [i1, p9, p8, p6, p4, p3, f24, i11, i15, p13, 
i12, i14, p10]
         i26 = int_add(i12, 8)
@@ -1246,7 +1267,6 @@
     def test_cast_1(self):
         trace = """
         [i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, 
i20, i21, i22, i23]
-        guard_early_exit() [p8, p5, p4, p2, p17, i13, i12, i10, i19, p14, p11, 
i18, i15, i16, i9]
         i24 = raw_load(i20, i16, descr=singlefloatarraydescr)
         guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, 
p14, p11, i18, i15, i16, None]
         i27 = int_add(i16, 4)
@@ -1269,7 +1289,6 @@
     def test_all_guard(self):
         trace = """
         [p0, p3, i4, i5, i6, i7]
-        guard_early_exit() [p0, p3, i5, i4]
         f8 = raw_load(i6, i5, descr=floatarraydescr)
         guard_not_invalidated() [p0, f8, p3, i5, i4]
         i9 = cast_float_to_int(f8)
@@ -1287,7 +1306,6 @@
     def test_max(self):
         trace = """
         [p3, i4, p2, i5, f6, i7, i8]
-        guard_early_exit() [p2, f6, i4, i5, p3]
         f9 = raw_load(i7, i5, descr=floatarraydescr)
         guard_not_invalidated() [p2, f9, f6, i4, i5, p3]
         i10 = float_ge(f6, f9)
@@ -1307,7 +1325,6 @@
     def test_abc(self):
         trace="""
         [p0, p1, p5, p6, p7, p12, p13, i14, i15, i16, i17, i18, i19, i20]
-        guard_early_exit() []
         debug_merge_point(0, 0, '<code object <module>. file 
'/home/rich/proj/da/thesis/bench/user1.py'. line 2> #117 LOAD_NAME')
         guard_not_invalidated(descr=<ResumeGuardNotInvalidated object at 
0x7fc657d7be20>) [p1, p0, p5, p6, p7, p12, p13]
         debug_merge_point(0, 0, '<code object <module>. file 
'/home/rich/proj/da/thesis/bench/user1.py'. line 2> #120 LOAD_CONST')
@@ -1354,7 +1371,6 @@
     def test_bug1(self):
         trace="""
         [p0, p1, p6, p7, p11, i83, f57, f61, f65, f70, f78, f81, i48, i56, p46]
-        guard_early_exit(descr=<Guard0x7fa392d5c1a0>) [p1, p0, p6, p7, p11, 
f81, f78, f70, f65, f61, f57, i83]
         guard_not_invalidated(descr=<Guard0x7fa392d5c200>) [p1, p0, p6, p7, 
p11, f81, f78, f70, f65, f61, f57, i83]
         i91 = int_lt(i83, i48)
         guard_true(i91, descr=<Guard0x7fa392d5c260>) [p1, p0, p6, p7, p11, 
i48, f81, f78, f70, f65, f61, f57, i83]
@@ -1389,7 +1405,6 @@
     def test_1(self):
         trace = """
         [p0, p1, p6, p7, i13, p14, p15]
-        guard_early_exit(descr=<ResumeAtLoopHeaderDescr object at 
0x7f89c54cdbe0>) [p1, p0, p6, p7, i13]
         guard_not_invalidated(descr=<ResumeGuardNotInvalidated object at 
0x7f89c54cdc40>) [p1, p0, p6, p7, i13]
         i17 = int_lt(i13, 10000)
         guard_true(i17, descr=<ResumeGuardTrueDescr object at 0x7f89c54cdca0>) 
[p1, p0, p6, p7, i13]
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -176,7 +176,6 @@
         if vsize == 0 or byte_count == 0 or loop.label.getopnum() != rop.LABEL:
             # stop, there is no chance to vectorize this trace
             # we cannot optimize normal traces (if there is no label)
-            import pdb; pdb. set_trace()
             raise NotAVectorizeableLoop()
 
         # find index guards and move to the earliest position
@@ -188,8 +187,6 @@
         # unroll
         self.unroll_count = self.get_unroll_count(vsize)
         self.unroll_loop_iterations(loop, self.unroll_count)
-        loop.operations = self.get_newoperations()
-        self.clear_newoperations();
 
         # vectorize
         graph = DependencyGraph(loop)
@@ -210,8 +207,6 @@
     def unroll_loop_iterations(self, loop, unroll_count):
         """ Unroll the loop X times. unroll_count + 1 = unroll_factor """
         numops = len(loop.operations)
-        label_op = loop.label
-        jump_op = loop.jump
         # use the target token of the label
         #target_token = label_op.getdescr()
         #if not we_are_translated():
@@ -223,33 +218,32 @@
         #    jump_op.setdescr(target_token)
         #assert jump_op.is_final()
 
-        self.emit_unrolled_operation(label_op)
+        #self.emit_unrolled_operation(label_op)
+
+        #for i in range(0,numops):
+        #    op = loop.operations[i].copy()
+        #    if op.is_guard():
+        #        assert isinstance(op, GuardResOp)
+        #        failargs = renamer.rename_failargs(op, clone=True)
+        #        snapshot = renamer.rename_rd_snapshot(op.rd_snapshot, 
clone=True)
+        #        op.setfailargs(failargs)
+        #        op.rd_snapshot = snapshot
+        #    operations.append(op)
+        #    self.emit_unrolled_operation(op)
 
         renamer = Renamer()
-        operations = []
-        for i in range(1,numops-1):
-            op = loop.operations[i].copy()
-            if op.is_guard():
-                assert isinstance(op, GuardResOp)
-                failargs = renamer.rename_failargs(op, clone=True)
-                snapshot = renamer.rename_rd_snapshot(op.rd_snapshot, 
clone=True)
-                op.setfailargs(failargs)
-                op.rd_snapshot = snapshot
-            operations.append(op)
-            self.emit_unrolled_operation(op)
-
+        operations = loop.operations
+        unrolled = []
         prohibit_opnums = (rop.GUARD_FUTURE_CONDITION,
-                           rop.GUARD_EARLY_EXIT,
                            rop.GUARD_NOT_INVALIDATED)
-
-        orig_jump_args = jump_op.getarglist()[:]
+        orig_jump_args = loop.jump.getarglist()[:]
         # it is assumed that #label_args == #jump_args
         label_arg_count = len(orig_jump_args)
         for u in range(unroll_count):
             # fill the map with the renaming boxes. keys are boxes from the 
label
             for i in range(label_arg_count):
-                la = label_op.getarg(i)
-                ja = jump_op.getarg(i)
+                la = loop.label.getarg(i)
+                ja = loop.jump.getarg(i)
                 ja = renamer.rename_box(ja)
                 if la != ja:
                     renamer.start_renaming(la, ja)
@@ -284,17 +278,18 @@
                             renamer.rename_failargs(copied_op, clone=True)
                         copied_op.setfailargs(renamed_failargs)
                 #
-                self.emit_unrolled_operation(copied_op)
+                unrolled.append(copied_op)
 
         # the jump arguments have been changed
         # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop
         # must look like this: label(i(X+1)) ... jump(i(X+2))
-        args = jump_op.getarglist()
+        args = loop.jump.getarglist()
         for i, arg in enumerate(args):
             value = renamer.rename_box(arg)
-            jump_op.setarg(i, value)
+            loop.jump.setarg(i, value)
         #
-        self.emit_unrolled_operation(jump_op)
+        #self.emit_unrolled_operation(jump_op)
+        loop.operations = operations + unrolled
 
     def linear_find_smallest_type(self, loop):
         # O(#operations)
@@ -456,14 +451,7 @@
                     fail = True
                 check[left] = None
                 check[right] = None
-                accum = pack.accum
-                if accum:
-                    self.packset.accum_vars[accum.var] = accum.pos
-
-                print " %dx %s " % (len(pack.operations),
-                                    pack.operations[0].op.getopname())
-                if accum:
-                    print "   accumulates!"
+                print " ", pack
             if fail:
                 assert False
 
@@ -537,9 +525,9 @@
                     modify_later.append(prev_node)
                 else:
                     for path in prev_node.iterate_paths(None, backwards=True, 
blacklist=True):
-                        if not path.is_always_pure(exclude_first=True):
-                            path.set_schedule_priority(90)
+                        if not path.is_always_pure():
                             valid = False
+                        else:
                             if path.last() in zero_deps:
                                 del zero_deps[path.last()]
                     if not valid:
@@ -559,34 +547,25 @@
 
                 for node in zero_deps.keys():
                     earlyexit.edge_to(node)
-                # TODO self.relax_guard_to(guard_node, ee_guard_node)
+                self.mark_guard(guard_node, loop)
         if one_valid:
             return graph
         return None
 
-    def relax_guard_to(self, guard_node, other_node):
-        """ Relaxes a guard operation to an earlier guard. """
-        # clone this operation object. if the vectorizer is
-        # not able to relax guards, it won't leave behind a modified operation
-        tgt_op = guard_node.getoperation().clone()
-        guard_node.op = tgt_op
-
-        op = other_node.getoperation()
-        assert isinstance(tgt_op, GuardResOp)
+    def mark_guard(self, node, loop):
+        """ Marks this guard as an early exit! """
+        op = node.getoperation()
         assert isinstance(op, GuardResOp)
-        olddescr = op.getdescr()
         descr = None
-        guard_true_false = tgt_op.getopnum() in (rop.GUARD_TRUE, 
rop.GUARD_FALSE)
-        if guard_true_false:
+        if op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE):
             descr = CompileLoopVersionDescr()
         else:
             descr = ResumeAtLoopHeaderDescr()
-        if olddescr:
-            descr.copy_all_attributes_from(olddescr)
+        if op.getdescr():
+            descr.copy_all_attributes_from(op.getdescr())
         #
-        tgt_op.setdescr(descr)
-        tgt_op.setfailargs(op.getfailargs()[:])
-
+        op.setdescr(descr)
+        op.setfailargs(loop.inputargs)
 
 class CostModel(object):
     """ Utility to estimate the savings for the new trace loop.
@@ -687,7 +666,6 @@
                 if forward and origin_pack.is_accumulating():
                     # in this case the splitted accumulator must
                     # be combined. This case is not supported
-                    import pdb; pdb. set_trace()
                     raise NotAVectorizeableLoop()
                 #
                 if self.contains_pair(lnode, rnode):
diff --git a/rpython/jit/metainterp/optimizeopt/version.py 
b/rpython/jit/metainterp/optimizeopt/version.py
--- a/rpython/jit/metainterp/optimizeopt/version.py
+++ b/rpython/jit/metainterp/optimizeopt/version.py
@@ -113,26 +113,26 @@
         jump.setdescr(token)
 
 
-def index_of_first(opnum, operations, pass_by=0):
-    """ returns the position of the first operation matching the opnum.
-    Or -1 if non is found
-    """
-    for i,op in enumerate(operations):
-        if op.getopnum() == opnum:
-            if pass_by == 0:
-                return i
-            else:
-                pass_by -= 1
-    return -1
+#def index_of_first(opnum, operations, pass_by=0):
+#    """ returns the position of the first operation matching the opnum.
+#    Or -1 if non is found
+#    """
+#    for i,op in enumerate(operations):
+#        if op.getopnum() == opnum:
+#            if pass_by == 0:
+#                return i
+#            else:
+#                pass_by -= 1
+#    return -1
+#
+#def find_first_index(self, opnum, pass_by=0):
+#    """ return the first index of the operation having the same opnum or -1 
"""
+#    return index_of_first(opnum, self.operations, pass_by)
+#
+#def find_first(self, opnum, pass_by=0):
+#    index = self.find_first_index(opnum, pass_by)
+#    if index != -1:
+#        return self.operations[index]
+#    return None
 
-def find_first_index(self, opnum, pass_by=0):
-    """ return the first index of the operation having the same opnum or -1 """
-    return index_of_first(opnum, self.operations, pass_by)
 
-def find_first(self, opnum, pass_by=0):
-    index = self.find_first_index(opnum, pass_by)
-    if index != -1:
-        return self.operations[index]
-    return None
-
-
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to