Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79683:5190c354f531 Date: 2015-09-18 11:45 +0200 http://bitbucket.org/pypy/pypy/changeset/5190c354f531/
Log: beating the integraiton tests, roughly half work already diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py b/rpython/jit/metainterp/optimizeopt/__init__.py --- a/rpython/jit/metainterp/optimizeopt/__init__.py +++ b/rpython/jit/metainterp/optimizeopt/__init__.py @@ -32,9 +32,9 @@ def build_opt_chain(metainterp_sd, enable_opts): optimizations = [] unroll = 'unroll' in enable_opts # 'enable_opts' is normally a dict - #if (metainterp_sd.cpu is not None and - # not metainterp_sd.cpu.supports_guard_gc_type): - # unroll = False + if (metainterp_sd.cpu is not None and + not metainterp_sd.cpu.supports_guard_gc_type): + unroll = False for name, opt in unroll_all_opts: if name in enable_opts: if opt is not None: diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -71,7 +71,11 @@ if exclude_last: count -= 1 while i < count: - op = self.path[i].getoperation() + node = self.path[i] + if node.is_imaginary(): + i += 1 + continue + op = node.getoperation() if op.is_guard(): descr = op.getdescr() if not descr: @@ -189,9 +193,6 @@ isinstance(descr, compile.CompileLoopVersionDescr) return False - # TODO def is_guard_early_exit(self): - # return self.op.getopnum() == rop.GUARD_EARLY_EXIT - def loads_from_complex_object(self): return rop._ALWAYS_PURE_LAST <= self.op.getopnum() < rop._MALLOC_FIRST @@ -1057,33 +1058,27 @@ return self.constant - other.constant def emit_operations(self, opt, result_box=None): - box = self.var + var = self.var if self.is_identity(): - return box + return var last_op = None if self.coefficient_mul != 1: - box_result = box.clonebox() - last_op = ResOperation(rop.INT_MUL, [box, ConstInt(self.coefficient_mul)], box_result) - opt.emit_operation(last_op) - box = box_result + args = [var, ConstInt(self.coefficient_mul)] + var = ResOperation(rop.INT_MUL, args) + opt.emit_operation(var) if self.coefficient_div != 1: - box_result = box.clonebox() - last_op = ResOperation(rop.INT_FLOORDIV, [box, ConstInt(self.coefficient_div)], box_result) - opt.emit_operation(last_op) - box = box_result + args = [var, ConstInt(self.coefficient_div)] + var = ResOperation(rop.INT_FLOORDIV, args) + opt.emit_operation(var) if self.constant > 0: - box_result = box.clonebox() - last_op = ResOperation(rop.INT_ADD, [box, ConstInt(self.constant)], box_result) - opt.emit_operation(last_op) - box = box_result + args = [var, ConstInt(self.constant)] + vec = ResOperation(rop.INT_ADD, args) + opt.emit_operation(vec) if self.constant < 0: - box_result = box.clonebox() - last_op = ResOperation(rop.INT_SUB, [box, ConstInt(self.constant)], box_result) - opt.emit_operation(last_op) - box = box_result - if result_box is not None: - last_op.result = box = result_box - return box + args = [var, ConstInt(self.constant)] + var = ResOperation(rop.INT_SUB, args) + opt.emit_operation(var) + return var def compare(self, other): """ Returns if the two are compareable as a first result diff --git a/rpython/jit/metainterp/optimizeopt/guard.py b/rpython/jit/metainterp/optimizeopt/guard.py --- a/rpython/jit/metainterp/optimizeopt/guard.py +++ b/rpython/jit/metainterp/optimizeopt/guard.py @@ -133,15 +133,13 @@ def emit_operations(self, opt): # create trace instructions for the index - box_lhs = self.emit_varops(opt, self.lhs, self.cmp_op.getarg(0)) - box_rhs = self.emit_varops(opt, self.rhs, self.cmp_op.getarg(1)) - box_result = self.cmp_op.result.clonebox() + lhs = self.emit_varops(opt, self.lhs, self.cmp_op.getarg(0)) + rhs = self.emit_varops(opt, self.rhs, self.cmp_op.getarg(1)) opnum = self.cmp_op.getopnum() - cmp_op = ResOperation(opnum, [box_lhs, box_rhs], box_result) + cmp_op = ResOperation(opnum, [lhs, rhs]) opt.emit_operation(cmp_op) # emit that actual guard - guard = self.op.clone() - guard.setarg(0, box_result) + guard = ResOperation(self.op.getopnum(), [cmp_op], self.op.getdescr()) opt.emit_operation(guard) self.setindex(opt.operation_position()-1) self.setoperation(guard) @@ -179,13 +177,12 @@ class GuardStrengthenOpt(object): """ Note that this optimization is only used in the vector optimizer (yet) """ - def __init__(self, index_vars, has_two_labels): + def __init__(self, index_vars): self.index_vars = index_vars self._newoperations = [] self.strength_reduced = 0 # how many guards could be removed? self.strongest_guards = {} self.guards = {} - self.has_two_labels = has_two_labels def collect_guard_information(self, loop): operations = loop.operations @@ -251,11 +248,11 @@ else: self.emit_operation(op) continue - if op.result: - index_var = self.index_vars.get(op.result, None) + if not op.returns_void(): + index_var = self.index_vars.get(op, None) if index_var: if not index_var.is_identity(): - index_var.emit_operations(self, op.result) + index_var.emit_operations(self, op) continue self.emit_operation(op) # diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -124,7 +124,6 @@ """ Emit all the operations into the oplist parameter. Initiates the scheduling. """ assert isinstance(state, SchedulerState) - import pdb; pdb.set_trace() while state.has_more(): node = self.next(state) if node: diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py --- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py @@ -51,8 +51,10 @@ else: label = loop.operations[0] label.setdescr(TargetToken(token)) - loop = VectorLoop(label, loop.operations[0:-1], loop.operations[-1]) + jump = loop.operations[-1] + loop = VectorLoop(label, loop.operations[0:-1], jump) loop.jump.setdescr(token) + # TODO for op in loop.operations: if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None: op.setdescr(ResumeAtLoopHeaderDescr()) @@ -184,10 +186,6 @@ assert not m1.is_adjacent_to(m2) assert not m2.is_adjacent_to(m1) - def getmemref(self, idx): - node = self.last_graph.getnode(idx) - return self.last_graph.memory_refs[node] - class BaseTestDependencyGraph(DependencyBaseTest): def test_index_var_basic(self): @@ -264,9 +262,9 @@ jump() # 4: """ graph = self.assert_dependencies(ops, full_check=True) - self.assert_dependent(1,2) - self.assert_dependent(2,3) - self.assert_dependent(1,3) + self.assert_dependent(graph, 1,2) + self.assert_dependent(graph, 2,3) + self.assert_dependent(graph, 1,3) def test_def_use_jump_use_def(self): ops = """ @@ -417,7 +415,7 @@ jump(p0, i1, i2) # 3: """ self.assert_dependencies(ops, full_check=True) - self.assert_dependent(1,2) + self.assert_dependent(graph, 1,2) def test_setarrayitem_dont_depend_with_memref_info(self): ops=""" @@ -457,7 +455,7 @@ jump(i24, i19, i21, i3, i4, i5, i22, i7) # 21: """ self.assert_dependencies(ops, full_check=False) - self.assert_dependent(2,12) + self.assert_dependent(graph, 2,12) def test_getfield(self): trace = """ diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py @@ -11,20 +11,31 @@ import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt import rpython.jit.metainterp.optimizeopt.virtualize as virtualize from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph -from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer, MemoryRef, - isomorphic, Pair, NotAVectorizeableLoop, NotAProfitableLoop, GuardStrengthenOpt, - CostModel, VectorLoop) -from rpython.jit.metainterp.optimizeopt.schedule import (Scheduler, SchedulerState) +from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer, + MemoryRef, isomorphic, Pair, NotAVectorizeableLoop, VectorLoop, + NotAProfitableLoop, GuardStrengthenOpt, CostModel, X86_CostModel) +from rpython.jit.metainterp.optimizeopt.schedule import (Scheduler, + SchedulerState, VecScheduleState) from rpython.jit.metainterp.optimize import InvalidLoop from rpython.jit.metainterp import compile from rpython.jit.metainterp.resoperation import rop, ResOperation +from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo class FakeJitDriverStaticData(object): vec=True +class FakeLoopInfo(LoopVersionInfo): + def __init__(self, loop): + self.target_token = loop.label.getdescr() + self.label_op = loop.label + self.insert_index = -1 + self.versions = [] + self.leads_to = {} + self.descrs = [] + class FakeCostModel(CostModel): - def __init__(self): - CostModel.__init__(self, 0, 16) + def __init__(self, cpu): + CostModel.__init__(self, cpu, 16) def record_cast_int(self): pass def record_pack_savings(self, pack, times): pass def record_vector_pack(self, box, index, count): pass @@ -34,6 +45,19 @@ def profitable(self): return True +def index_of_first(opnum, operations, pass_by=0): + for i,op in enumerate(operations): + if op.getopnum() == opnum: + if pass_by == 0: + return i + else: + pass_by -= 1 + return -1 + +def find_first_index(loop, opnum, pass_by=0): + """ return the first index of the operation having the same opnum or -1 """ + return index_of_first(opnum, loop.operations, pass_by) + ARCH_VEC_REG_SIZE = 16 class VecTestHelper(DependencyBaseTest): @@ -43,7 +67,11 @@ jitdriver_sd = FakeJitDriverStaticData() def assert_vectorize(self, loop, expected_loop, call_pure_results=None): - self._do_optimize_loop(loop) + jump = ResOperation(rop.LABEL, loop.jump.getarglist(), loop.jump.getdescr()) + compile_data = compile.LoopCompileData(loop.label, jump, loop.operations) + state = self._do_optimize_loop(compile_data) + loop.label = state[0].label_op + loop.opererations = state[1] self.assert_equal(loop, expected_loop) def vectoroptimizer(self, loop): @@ -56,13 +84,11 @@ def earlyexit(self, loop): opt = self.vectoroptimizer(loop) graph = opt.analyse_index_calculations(loop) - graph.view() state = SchedulerState(graph) opt.schedule(state) return graph.loop def vectoroptimizer_unrolled(self, loop, unroll_factor = -1): - loop.snapshot() opt = self.vectoroptimizer(loop) opt.linear_find_smallest_type(loop) if unroll_factor == -1 and opt.smallest_type_bytes == 0: @@ -71,76 +97,84 @@ unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE) print "" print "unroll factor: ", unroll_factor, opt.smallest_type_bytes - if opt.loop.find_first_index(rop.GUARD_EARLY_EXIT) == -1: - idx = loop.find_first_index(rop.LABEL) - guard = ResOperation(rop.GUARD_EARLY_EXIT, [], None) - guard.setfailargs([]) - guard.setdescr(compile.ResumeAtLoopHeaderDescr()) - loop.operations.insert(idx+1, guard) - self.show_dot_graph(DependencyGraph(opt.loop), "original_" + self.test_name) - graph = opt.analyse_index_calculations() + # TODO if opt.loop.find_first_index(rop.GUARD_EARLY_EXIT) == -1: + # idx = loop.find_first_index(rop.LABEL) + # guard = ResOperation(rop.GUARD_EARLY_EXIT, [], None) + # guard.setfailargs([]) + # guard.setdescr(compile.ResumeAtLoopHeaderDescr()) + # loop.operations.insert(idx+1, guard) + self.show_dot_graph(DependencyGraph(loop), "original_" + self.test_name) + graph = opt.analyse_index_calculations(loop) if graph is not None: - cycle = opt.dependency_graph.cycles() + cycle = graph.cycles() if cycle is not None: print "CYCLE found %s" % cycle - self.show_dot_graph(opt.dependency_graph, "early_exit_" + self.test_name) + self.show_dot_graph(graph, "early_exit_" + self.test_name) assert cycle is None - loop.operations = opt.schedule(False) + state = SchedulerState(graph) + opt.schedule(state) opt.unroll_loop_iterations(loop, unroll_factor) - opt.loop.operations = opt.get_newoperations() - self.debug_print_operations(opt.loop) - opt.clear_newoperations() + self.debug_print_operations(loop) graph = DependencyGraph(loop) - self.last_graph = graph - self.show_dot_graph(self.last_graph, self.test_name) + self.last_graph = graph # legacy for test_dependency + self.show_dot_graph(graph, self.test_name) + def gmr(i): + return graph.memory_refs[graph.nodes[i]] + graph.getmemref = gmr return opt, graph def init_packset(self, loop, unroll_factor = -1): opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor) opt.find_adjacent_memory_refs(graph) - return opt + return opt, graph def extend_packset(self, loop, unroll_factor = -1): opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor) opt.find_adjacent_memory_refs(graph) opt.extend_packset() - return opt + return opt, graph def combine_packset(self, loop, unroll_factor = -1): opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor) opt.find_adjacent_memory_refs(graph) opt.extend_packset() opt.combine_packset() - return opt + return opt, graph def schedule(self, loop, unroll_factor = -1, with_guard_opt=False): + info = FakeLoopInfo(loop) + info.snapshot(loop.operations + [loop.jump], loop.label) opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor) - opt.costmodel = FakeCostModel() opt.find_adjacent_memory_refs(graph) opt.extend_packset() opt.combine_packset() - opt.schedule(graph, True) + costmodel = FakeCostModel(self.cpu) + state = VecScheduleState(graph, opt.packset, self.cpu, costmodel) + opt.schedule(state) if with_guard_opt: - gso = GuardStrengthenOpt(opt.dependency_graph.index_vars, opt.has_two_labels) - gso.propagate_all_forward(opt.loop) + gso = GuardStrengthenOpt(graph.index_vars) + gso.propagate_all_forward(info, loop) return opt def vectorize(self, loop, unroll_factor = -1): - opt = self.vectoroptimizer_unrolled(loop, unroll_factor) - opt.find_adjacent_memory_refs() + info = FakeLoopInfo(loop) + info.snapshot(loop.operations + [loop.jump], loop.label) + opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor) + opt.find_adjacent_memory_refs(graph) opt.extend_packset() opt.combine_packset() - opt.costmodel.reset_savings() - opt.schedule(True) - if not opt.costmodel.profitable(): + costmodel = X86_CostModel(self.cpu, 0) + state = VecScheduleState(graph, opt.packset, self.cpu, costmodel) + opt.schedule(state) + if not costmodel.profitable(): raise NotAProfitableLoop() - gso = GuardStrengthenOpt(opt.dependency_graph.index_vars, opt.has_two_labels) - gso.propagate_all_forward(opt.loop) + gso = GuardStrengthenOpt(graph.index_vars) + gso.propagate_all_forward(info, loop) return opt def assert_unroll_loop_equals(self, loop, expected_loop, \ unroll_factor = -1): - vectoroptimizer = self.vectoroptimizer_unrolled(loop, unroll_factor) + self.vectoroptimizer_unrolled(loop, unroll_factor) self.assert_equal(loop, expected_loop) def assert_pack(self, pack, indices): @@ -171,23 +205,24 @@ def assert_packset_not_contains_pair(self, packset, x, y): for pack in packset.packs: - if pack.left.opidx == x and \ - pack.right.opidx == y: + if pack.leftmost(node=True).opidx == x and \ + pack.rightmost(node=True).opidx == y: pytest.fail("must not find packset with indices {x},{y}" \ .format(x=x,y=y)) def assert_packset_contains_pair(self, packset, x, y): for pack in packset.packs: if isinstance(pack, Pair): - if pack.left.opidx == x and \ - pack.right.opidx == y: + if pack.leftmost(node=True).opidx == x and \ + pack.rightmost(node=True).opidx == y: break else: pytest.fail("can't find a pack set for indices {x},{y}" \ .format(x=x,y=y)) - def assert_has_memory_ref_at(self, idx): - node = self.last_graph.nodes[idx] - assert node in self.last_graph.memory_refs, \ + def assert_has_memory_ref_at(self, graph, idx): + idx -= 1 # label is not in the nodes + node = graph.nodes[idx] + assert node in graph.memory_refs, \ "operation %s at pos %d has no memory ref!" % \ (node.getoperation(), node.getindex()) @@ -248,7 +283,7 @@ """ it currently rejects pointer arrays """ ops = """ [p0,i0] - raw_load_r(p0,i0,descr=arraydescr2) + getarrayitem_gc_r(p0,i0,descr=arraydescr2) jump(p0,i0) """ self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops)) @@ -257,9 +292,9 @@ """ it currently rejects pointer arrays """ ops = """ [p0,i0] - i2 = getarrayitem_gc(p0,i0,descr=floatarraydescr) + i2 = getarrayitem_gc_i(p0,i0,descr=arraydescr) i1 = int_add(i0,1) - i3 = getarrayitem_gc(p0,i1,descr=floatarraydescr) + i3 = getarrayitem_gc_i(p0,i1,descr=arraydescr) i4 = int_add(i1,1) jump(p0,i4) """ @@ -267,11 +302,12 @@ [p0,i0] i1 = int_add(i0,1) i2 = int_add(i0,2) - i3 = vec_getarrayitem_gc(p0,i0,2,descr=floatarraydescr) + v3[2xi64] = vec_getarrayitem_gc_i(p0,i0,descr=arraydescr) jump(p0,i2) """ - vopt = self.vectorize(self.parse_loop(ops),0) - self.assert_equal(vopt.loop, self.parse_loop(opt)) + loop = self.parse_loop(ops) + vopt = self.vectorize(loop,0) + self.assert_equal(loop, self.parse_loop(opt)) def test_vect_unroll_char(self): """ a 16 byte vector register can hold 16 bytes thus @@ -306,14 +342,14 @@ [p0,p1,p2,i0] i4 = int_add(i0, 1) i5 = int_le(i4, 10) - guard_true(i5) [] + guard_true(i5) [p0,p1,p2,i0] i1 = raw_load_i(p1, i0, descr=floatarraydescr) i2 = raw_load_i(p2, i0, descr=floatarraydescr) i3 = int_add(i1,i2) raw_store(p0, i0, i3, descr=floatarraydescr) i9 = int_add(i4, 1) i10 = int_le(i9, 10) - guard_true(i10) [] + guard_true(i10) [p0,p1,p2,i4] i6 = raw_load_i(p1, i4, descr=floatarraydescr) i7 = raw_load_i(p2, i4, descr=floatarraydescr) i8 = int_add(i6,i7) @@ -338,9 +374,9 @@ raw_load_i(p0,i0,descr=arraydescr) jump(p0,i0) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - assert len(vopt.dependency_graph.memory_refs) == 1 - self.assert_has_memory_ref_at(1) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + assert len(graph.memory_refs) == 1 + self.assert_has_memory_ref_at(graph, 1) def test_array_operation_indices_unrolled_1(self): ops = """ @@ -348,10 +384,10 @@ raw_load_i(p0,i0,descr=chararraydescr) jump(p0,i0) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),1) - assert len(vopt.dependency_graph.memory_refs) == 2 - self.assert_has_memory_ref_at(1) - self.assert_has_memory_ref_at(2) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),1) + assert len(graph.memory_refs) == 2 + self.assert_has_memory_ref_at(graph, 1) + self.assert_has_memory_ref_at(graph, 2) def test_array_operation_indices_unrolled_2(self): ops = """ @@ -361,20 +397,20 @@ jump(p0,i3,i4) """ loop = self.parse_loop(ops) - vopt = self.vectoroptimizer_unrolled(loop,0) - assert len(vopt.dependency_graph.memory_refs) == 2 - self.assert_has_memory_ref_at(1) - self.assert_has_memory_ref_at(2) + vopt, graph = self.vectoroptimizer_unrolled(loop,0) + assert len(graph.memory_refs) == 2 + self.assert_has_memory_ref_at(graph, 1) + self.assert_has_memory_ref_at(graph, 2) # - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),1) - assert len(vopt.dependency_graph.memory_refs) == 4 + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),1) + assert len(graph.memory_refs) == 4 for i in [1,2,3,4]: - self.assert_has_memory_ref_at(i) + self.assert_has_memory_ref_at(graph, i) # - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),3) - assert len(vopt.dependency_graph.memory_refs) == 8 + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),3) + assert len(graph.memory_refs) == 8 for i in [1,2,3,4,5,6,7,8]: - self.assert_has_memory_ref_at(i) + self.assert_has_memory_ref_at(graph, i) def test_array_memory_ref_adjacent_1(self): ops = """ @@ -384,12 +420,12 @@ jump(p0,i1) """ loop = self.parse_loop(ops) - vopt = self.vectoroptimizer_unrolled(loop,1) - vopt.find_adjacent_memory_refs() - assert len(vopt.dependency_graph.memory_refs) == 2 + vopt, graph = self.vectoroptimizer_unrolled(loop,1) + vopt.find_adjacent_memory_refs(graph) + assert len(graph.memory_refs) == 2 - mref1 = self.getmemref(loop.find_first_index(rop.RAW_LOAD)) - mref3 = self.getmemref(loop.find_first_index(rop.RAW_LOAD,1)) + mref1 = graph.getmemref(find_first_index(loop, rop.RAW_LOAD_I)) + mref3 = graph.getmemref(find_first_index(loop, rop.RAW_LOAD_I,1)) assert isinstance(mref1, MemoryRef) assert isinstance(mref3, MemoryRef) @@ -402,9 +438,9 @@ i3 = raw_load_i(p0,i0,descr=chararraydescr) jump(p0,i0) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref1 = self.getmemref(1) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref1 = graph.getmemref(0) assert isinstance(mref1, MemoryRef) assert mref1.index_var.coefficient_mul == 1 assert mref1.index_var.constant == 0 @@ -416,9 +452,9 @@ i3 = raw_load_i(p0,i1,descr=chararraydescr) jump(p0,i1) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref1 = self.getmemref(2) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref1 = graph.getmemref(1) assert isinstance(mref1, MemoryRef) assert mref1.index_var.coefficient_mul == 1 assert mref1.index_var.constant == 1 @@ -430,9 +466,9 @@ i3 = raw_load_i(p0,i1,descr=chararraydescr) jump(p0,i1) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref1 = self.getmemref(2) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref1 = graph.getmemref(1) assert isinstance(mref1, MemoryRef) assert mref1.index_var.coefficient_mul == 1 assert mref1.index_var.constant == -1 @@ -445,9 +481,9 @@ i3 = raw_load_i(p0,i2,descr=chararraydescr) jump(p0,i1) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref1 = self.getmemref(3) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref1 = graph.getmemref(2) assert isinstance(mref1, MemoryRef) assert mref1.index_var.coefficient_mul == 3 assert mref1.index_var.constant == 3 @@ -462,9 +498,9 @@ i5 = raw_load_i(p0,i4,descr=chararraydescr) jump(p0,i4) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref1 = self.getmemref(5) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref1 = graph.getmemref(4) assert isinstance(mref1, MemoryRef) assert mref1.index_var.coefficient_mul == 18 assert mref1.index_var.constant == 48 @@ -480,9 +516,9 @@ i7 = raw_load_i(p0,i6,descr=chararraydescr) jump(p0,i6) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref1 = self.getmemref(7) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref1 = graph.getmemref(6) assert isinstance(mref1, MemoryRef) assert mref1.index_var.coefficient_mul == 1026 assert mref1.index_var.coefficient_div == 1 @@ -498,9 +534,9 @@ i5 = raw_load_i(p0,i4,descr=chararraydescr) jump(p0,i4) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref1 = self.getmemref(5) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref1 = graph.getmemref(4) assert isinstance(mref1, MemoryRef) assert mref1.index_var.coefficient_mul == 6 assert mref1.index_var.coefficient_div == 1 @@ -516,16 +552,16 @@ jump(p0,i1,i6) """ loop = self.parse_loop(ops) - vopt = self.vectoroptimizer_unrolled(loop,1) - vopt.find_adjacent_memory_refs() + vopt, graph = self.vectoroptimizer_unrolled(loop,1) + vopt.find_adjacent_memory_refs(graph) - f = lambda x: loop.find_first_index(rop.RAW_LOAD, x) + f = lambda x: find_first_index(loop, rop.RAW_LOAD_I, x) indices = [f(0),f(1),f(2),f(3)] for i in indices: - self.assert_has_memory_ref_at(i) - assert len(vopt.dependency_graph.memory_refs) == 4 + self.assert_has_memory_ref_at(graph, i+1) + assert len(graph.memory_refs) == 4 - mref1, mref3, mref5, mref7 = [self.getmemref(i) for i in indices] + mref1, mref3, mref5, mref7 = [graph.getmemref(i) for i in indices] assert isinstance(mref1, MemoryRef) assert isinstance(mref3, MemoryRef) assert isinstance(mref5, MemoryRef) @@ -545,9 +581,9 @@ i3 = raw_load_i(p0,i2,descr=chararraydescr) jump(p0,i2) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref = self.getmemref(3) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref = graph.getmemref(2) assert mref.index_var.coefficient_div == 16 ops = """ [p0,i0] @@ -556,9 +592,9 @@ i3 = raw_load_i(p0,i2,descr=chararraydescr) jump(p0,i2) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref = self.getmemref(3) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref = graph.getmemref(2) assert mref.index_var.coefficient_div == 2 assert mref.index_var.constant == 4 ops = """ @@ -571,10 +607,10 @@ i6 = raw_load_i(p0,i5,descr=chararraydescr) jump(p0,i2) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref = self.getmemref(5) - mref2 = self.getmemref(6) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref = graph.getmemref(2) + mref2 = graph.getmemref(5) self.assert_memory_ref_not_adjacent(mref, mref2) assert mref != mref2 @@ -591,10 +627,10 @@ i7 = raw_load_i(p0,i6,descr=chararraydescr) jump(p0,i2) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref = self.getmemref(6) - mref2 = self.getmemref(7) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref = graph.getmemref(2) + mref2 = graph.getmemref(6) self.assert_memory_ref_not_adjacent(mref, mref2) assert mref == mref2 @@ -611,10 +647,10 @@ i7 = raw_load_i(p0,i6,descr=chararraydescr) jump(p0,i2) """ - vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) - vopt.find_adjacent_memory_refs() - mref = self.getmemref(6) - mref2 = self.getmemref(7) + vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0) + vopt.find_adjacent_memory_refs(graph) + mref = graph.getmemref(2) + mref2 = graph.getmemref(6) self.assert_memory_ref_not_adjacent(mref, mref2) assert mref != mref2 @@ -622,17 +658,17 @@ def test_packset_init_simple(self): ops = """ [p0,i0] - i3 = getarrayitem_raw(p0, i0, descr=chararraydescr) + i3 = getarrayitem_raw_i(p0, i0, descr=chararraydescr) i1 = int_add(i0, 1) i2 = int_le(i1, 16) guard_true(i2) [p0, i0] jump(p0,i1) """ loop = self.parse_loop(ops) - vopt = self.init_packset(loop,1) + vopt, graph = self.init_packset(loop,1) self.assert_independent(4,8) assert vopt.packset is not None - assert len(vopt.dependency_graph.memory_refs) == 2 + assert len(graph.memory_refs) == 2 assert len(vopt.packset.packs) == 1 def test_packset_init_raw_load_not_adjacent_and_adjacent(self): @@ -642,8 +678,8 @@ jump(p0,i0) """ loop = self.parse_loop(ops) - vopt = self.init_packset(loop,3) - assert len(vopt.dependency_graph.memory_refs) == 4 + vopt, graph = self.init_packset(loop,3) + assert len(graph.memory_refs) == 4 assert len(vopt.packset.packs) == 0 ops = """ [p0,i0] @@ -652,8 +688,8 @@ jump(p0,i2) """ loop = self.parse_loop(ops) - vopt = self.init_packset(loop,3) - assert len(vopt.dependency_graph.memory_refs) == 4 + vopt, graph = self.init_packset(loop,3) + assert len(graph.memory_refs) == 4 assert len(vopt.packset.packs) == 3 for i in range(3): x = (i+1)*2 @@ -667,24 +703,24 @@ i1 = int_add(i0, 1) i2 = int_le(i1, 16) guard_true(i2) [p0, i0] - i3 = getarrayitem_raw(p0, i1, descr=chararraydescr) + i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr) jump(p0,i1) """ loop = self.parse_loop(ops) - vopt = self.init_packset(loop,15) - assert len(vopt.dependency_graph.memory_refs) == 16 + vopt, graph = self.init_packset(loop,15) + assert len(graph.memory_refs) == 16 assert len(vopt.packset.packs) == 15 # assure that memory refs are not adjacent for all for i in range(15): for j in range(15): try: if i-4 == j or i+4 == j: - mref1 = self.getmemref(i) - mref2 = self.getmemref(j) + mref1 = graph.getmemref(i) + mref2 = graph.getmemref(j) assert mref1.is_adjacent_to(mref2) else: - mref1 = self.getmemref(i) - mref2 = self.getmemref(j) + mref1 = graph.getmemref(i) + mref2 = graph.getmemref(j) assert not mref1.is_adjacent_to(mref2) except KeyError: pass @@ -697,25 +733,20 @@ def test_isomorphic_operations(self): ops_src = """ [p1,p0,i0] - i3 = getarrayitem_raw(p0, i0, descr=chararraydescr) + i3 = getarrayitem_raw_i(p0, i0, descr=chararraydescr) i1 = int_add(i0, 1) i2 = int_le(i1, 16) - i4 = getarrayitem_raw(p0, i1, descr=chararraydescr) - i5 = getarrayitem_raw(p1, i1, descr=floatarraydescr) - i6 = getarrayitem_raw(p0, i1, descr=floatarraydescr) + i4 = getarrayitem_raw_i(p0, i1, descr=chararraydescr) + f5 = getarrayitem_raw_f(p1, i1, descr=floatarraydescr) + f6 = getarrayitem_raw_f(p0, i1, descr=floatarraydescr) guard_true(i2) [p0, i0] jump(p1,p0,i1) """ loop = self.parse_loop(ops_src) ops = loop.operations - assert isomorphic(ops[1], ops[4]) + assert isomorphic(ops[0], ops[3]) assert not isomorphic(ops[0], ops[1]) assert not isomorphic(ops[0], ops[5]) - # TODO strong assumptions do hold here? - #assert not isomorphic(ops[4], ops[5]) - #assert not isomorphic(ops[5], ops[6]) - #assert not isomorphic(ops[4], ops[6]) - #assert not isomorphic(ops[1], ops[6]) def test_packset_extend_simple(self): ops = """ @@ -723,33 +754,33 @@ i1 = int_add(i0, 1) i2 = int_le(i1, 16) guard_true(i2) [p0, i0] - i3 = getarrayitem_raw(p0, i1, descr=chararraydescr) + i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr) i4 = int_add(i3, 1) jump(p0,i1) """ loop = self.parse_loop(ops) - vopt = self.extend_packset(loop,1) - assert len(vopt.dependency_graph.memory_refs) == 2 + vopt, graph = self.extend_packset(loop,1) + assert len(graph.memory_refs) == 2 self.assert_independent(5,10) assert len(vopt.packset.packs) == 2 - self.assert_packset_empty(vopt.packset, len(loop.operations), + self.assert_packset_empty(vopt.packset, + len(loop.operations), [(5,10), (4,9)]) def test_packset_extend_load_modify_store(self): ops = """ [p0,i0] - guard_early_exit() [] i1 = int_add(i0, 1) i2 = int_le(i1, 16) guard_true(i2) [p0, i0] - i3 = getarrayitem_raw(p0, i1, descr=chararraydescr) + i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr) i4 = int_mul(i3, 2) setarrayitem_raw(p0, i1, i4, descr=chararraydescr) jump(p0,i1) """ loop = self.parse_loop(ops) - vopt = self.extend_packset(loop,1) - assert len(vopt.dependency_graph.memory_refs) == 4 + vopt, graph = self.extend_packset(loop,1) + assert len(graph.memory_refs) == 4 self.assert_independent(4,10) self.assert_independent(5,11) self.assert_independent(6,12) @@ -763,15 +794,18 @@ ('int',2, [(0,(2,4)),(1,(6,8))]), ('singlefloat',1,[(0,(2,4,6,8))])]) def test_packset_combine_simple(self,descr,packs,packidx): + suffix = '_i' + if 'float' in descr: + suffix = '_f' ops = """ [p0,i0] - i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr) + i3 = getarrayitem_raw{suffix}(p0, i0, descr={descr}arraydescr) i1 = int_add(i0,1) jump(p0,i1) - """.format(descr=descr) + """.format(descr=descr,suffix=suffix) loop = self.parse_loop(ops) - vopt = self.combine_packset(loop,3) - assert len(vopt.dependency_graph.memory_refs) == 4 + vopt, graph = self.combine_packset(loop,3) + assert len(graph.memory_refs) == 4 assert len(vopt.packset.packs) == packs for i,t in packidx: self.assert_pack(vopt.packset.packs[i], t) @@ -832,7 +866,6 @@ def test_packset_vector_operation(self, op, descr, stride): ops = """ [p0,p1,p2,i0] - guard_early_exit() [] i1 = int_add(i0, {stride}) i10 = int_le(i1, 128) guard_true(i10) [] @@ -864,7 +897,6 @@ def test_schedule_vector_operation(self, op, descr, stride): ops = """ [p0,p1,p2,i0] # 0 - guard_early_exit() [] i10 = int_le(i0, 128) # 1, 8, 15, 22 guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23 i2 = getarrayitem_raw(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24 @@ -882,8 +914,8 @@ i11 = int_le(i1, 128) guard_true(i11) [] i12 = int_add(i1, {stride}) - v1 = vec_getarrayitem_raw(p0, i0, 2, descr={descr}arraydescr) - v2 = vec_getarrayitem_raw(p1, i0, 2, descr={descr}arraydescr) + v1 = vec_getarrayitem_raw(p0, i0, descr={descr}arraydescr) + v2 = vec_getarrayitem_raw(p1, i0, descr={descr}arraydescr) v3 = {op}(v1,v2) vec_setarrayitem_raw(p2, i0, v3, descr={descr}arraydescr) jump(p0,p1,p2,i12) @@ -895,7 +927,6 @@ def test_vschedule_trace_1(self): ops = """ [i0, i1, i2, i3, i4] - guard_early_exit() [] i6 = int_mul(i0, 8) i7 = raw_load(i2, i6, descr=arraydescr) i8 = raw_load(i3, i6, descr=arraydescr) @@ -928,7 +959,6 @@ def test_collapse_index_guard_1(self): ops = """ [p0,i0] - guard_early_exit() [p0,i0] i1 = getarrayitem_raw(p0, i0, descr=chararraydescr) i2 = int_add(i0, 1) i3 = int_lt(i2, 102) @@ -949,7 +979,7 @@ {dead_code} i500 = int_add(i0, 16) i501 = int_lt(i2, 102) - i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr) + v10[16xi8] = vec_getarrayitem_raw(p0, i0, descr=chararraydescr) jump(p0,i2) """.format(dead_code=dead_code) vopt = self.schedule(self.parse_loop(ops),15,with_guard_opt=True) @@ -958,7 +988,6 @@ def test_too_small_vector(self): ops = """ [p0,i0] - guard_early_exit() [p0,i0] i1 = getarrayitem_raw(p0, 0, descr=chararraydescr) # constant index i2 = getarrayitem_raw(p0, 1, descr=chararraydescr) # constant index i4 = int_add(i1, i2) @@ -976,7 +1005,6 @@ def test_constant_expansion(self): ops = """ [p0,i0] - guard_early_exit() [p0,i0] i1 = getarrayitem_raw(p0, i0, descr=floatarraydescr) i4 = int_sub(i1, 42) i3 = int_add(i0,1) @@ -987,8 +1015,8 @@ opt=""" [p0,i0] label(p0,i0) - v3 = vec_int_expand(42, 2) - label(p0,i0,v3) + v3[2xf64] = vec_expand_f(42.0) + label(p0,i0,v3[2xf64]) i20 = int_add(i0, 1) i30 = int_lt(i20, 10) i2 = int_add(i0, 2) @@ -996,9 +1024,9 @@ guard_true(i3) [p0,i0] i4 = int_add(i0, 2) i5 = int_lt(i2, 10) - v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) - v2 = vec_int_sub(v1, v3) - jump(p0,i2,v3) + v1[2xf64] = vec_getarrayitem_raw(p0, i0, descr=floatarraydescr) + v2[2xf64] = vec_int_sub(v1[2xf64], v3[2xf64]) + jump(p0,i2,v3[2xf64]) """ vopt = self.vectorize(self.parse_loop(ops),1) self.assert_equal(vopt.loop, self.parse_loop(opt,add_label=False)) @@ -1006,7 +1034,6 @@ def test_variable_expansion(self): ops = """ [p0,i0,f3] - guard_early_exit() [p0,i0] f1 = getarrayitem_raw(p0, i0, descr=floatarraydescr) f4 = int_add(f1, f3) i3 = int_add(i0,1) @@ -1017,8 +1044,8 @@ opt=""" [p0,i0,f3] label(p0,i0,f3) - v3 = vec_float_expand(f3,2) - label(p0,i0,f3,v3) + v3[2xf64] = vec_expand_f(f3) + label(p0,i0,f3,v3[2xf64]) i20 = int_add(i0, 1) i30 = int_lt(i20, 10) i2 = int_add(i0, 2) @@ -1026,9 +1053,9 @@ guard_true(i3) [p0,i0,f3] i4 = int_add(i0, 2) i5 = int_lt(i2, 10) - v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) - v2 = vec_int_add(v1, v3) - jump(p0,i2,f3,v3) + v1[2xf64] = vec_getarrayitem_raw(p0, i0, descr=floatarraydescr) + v2[2xf64] = vec_int_add(v1[2xf64], v3[2xf64]) + jump(p0,i2,f3,v3[2xf64]) """ vopt = self.vectorize(self.parse_loop(ops),1) self.assert_equal(vopt.loop, self.parse_loop(opt, add_label=False)) @@ -1036,7 +1063,6 @@ def test_accumulate_basic(self): trace = """ [p0, i0, f0] - guard_early_exit() [p0, i0, f0] f1 = raw_load(p0, i0, descr=floatarraydescr) f2 = float_add(f0, f1) i1 = int_add(i0, 8) @@ -1063,7 +1089,6 @@ def test_element_f45_in_guard_failargs(self): ops = """ [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18] - guard_early_exit() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41] f45 = raw_load(i21, i44, descr=floatarraydescr) guard_not_invalidated() [p38, p12, p9, p14, f45, p39, i37, i44, f35, i40, p42, i43, None, i28, p36, i41] i46 = int_add(i44, 8) @@ -1107,7 +1132,6 @@ def test_shrink_vector_size(self): ops = """ [p0,p1,i1] - guard_early_exit() [] f1 = getarrayitem_raw(p0, i1, descr=floatarraydescr) i2 = cast_float_to_singlefloat(f1) setarrayitem_raw(p1, i1, i2, descr=singlefloatarraydescr) @@ -1143,7 +1167,6 @@ def test_castup_arith_castdown(self): ops = """ [p0,p1,p2,i0,i4] - guard_early_exit() [] i10 = raw_load(p0, i0, descr=singlefloatarraydescr) i1 = int_add(i0, 4) i11 = raw_load(p1, i1, descr=singlefloatarraydescr) @@ -1196,7 +1219,6 @@ def test_truediv_abs_neg_float(self): ops = """ [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19] - guard_early_exit() [p8, p7, p5, p4, p2, f9, i12, i11, p10, i15, i14, p13] f20 = raw_load(i16, i12, descr=floatarraydescr) guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, i15, i14, p13] i23 = int_add(i12, 8) @@ -1216,7 +1238,6 @@ def test_axis_sum(self): trace = """ [i1, p10, i11, p8, i12, p3, p4, p13, i14, i15, p6, p9, i16, i17, i18, i19, i20, i21, i22, i23] - guard_early_exit() [i1, p9, p8, p6, p4, p3, i11, i15, p13, i12, i14, p10] f24 = raw_load(i16, i12, descr=floatarraydescr) guard_not_invalidated() [i1, p9, p8, p6, p4, p3, f24, i11, i15, p13, i12, i14, p10] i26 = int_add(i12, 8) @@ -1246,7 +1267,6 @@ def test_cast_1(self): trace = """ [i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, i20, i21, i22, i23] - guard_early_exit() [p8, p5, p4, p2, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, i9] i24 = raw_load(i20, i16, descr=singlefloatarraydescr) guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, None] i27 = int_add(i16, 4) @@ -1269,7 +1289,6 @@ def test_all_guard(self): trace = """ [p0, p3, i4, i5, i6, i7] - guard_early_exit() [p0, p3, i5, i4] f8 = raw_load(i6, i5, descr=floatarraydescr) guard_not_invalidated() [p0, f8, p3, i5, i4] i9 = cast_float_to_int(f8) @@ -1287,7 +1306,6 @@ def test_max(self): trace = """ [p3, i4, p2, i5, f6, i7, i8] - guard_early_exit() [p2, f6, i4, i5, p3] f9 = raw_load(i7, i5, descr=floatarraydescr) guard_not_invalidated() [p2, f9, f6, i4, i5, p3] i10 = float_ge(f6, f9) @@ -1307,7 +1325,6 @@ def test_abc(self): trace=""" [p0, p1, p5, p6, p7, p12, p13, i14, i15, i16, i17, i18, i19, i20] - guard_early_exit() [] debug_merge_point(0, 0, '<code object <module>. file '/home/rich/proj/da/thesis/bench/user1.py'. line 2> #117 LOAD_NAME') guard_not_invalidated(descr=<ResumeGuardNotInvalidated object at 0x7fc657d7be20>) [p1, p0, p5, p6, p7, p12, p13] debug_merge_point(0, 0, '<code object <module>. file '/home/rich/proj/da/thesis/bench/user1.py'. line 2> #120 LOAD_CONST') @@ -1354,7 +1371,6 @@ def test_bug1(self): trace=""" [p0, p1, p6, p7, p11, i83, f57, f61, f65, f70, f78, f81, i48, i56, p46] - guard_early_exit(descr=<Guard0x7fa392d5c1a0>) [p1, p0, p6, p7, p11, f81, f78, f70, f65, f61, f57, i83] guard_not_invalidated(descr=<Guard0x7fa392d5c200>) [p1, p0, p6, p7, p11, f81, f78, f70, f65, f61, f57, i83] i91 = int_lt(i83, i48) guard_true(i91, descr=<Guard0x7fa392d5c260>) [p1, p0, p6, p7, p11, i48, f81, f78, f70, f65, f61, f57, i83] @@ -1389,7 +1405,6 @@ def test_1(self): trace = """ [p0, p1, p6, p7, i13, p14, p15] - guard_early_exit(descr=<ResumeAtLoopHeaderDescr object at 0x7f89c54cdbe0>) [p1, p0, p6, p7, i13] guard_not_invalidated(descr=<ResumeGuardNotInvalidated object at 0x7f89c54cdc40>) [p1, p0, p6, p7, i13] i17 = int_lt(i13, 10000) guard_true(i17, descr=<ResumeGuardTrueDescr object at 0x7f89c54cdca0>) [p1, p0, p6, p7, i13] diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -176,7 +176,6 @@ if vsize == 0 or byte_count == 0 or loop.label.getopnum() != rop.LABEL: # stop, there is no chance to vectorize this trace # we cannot optimize normal traces (if there is no label) - import pdb; pdb. set_trace() raise NotAVectorizeableLoop() # find index guards and move to the earliest position @@ -188,8 +187,6 @@ # unroll self.unroll_count = self.get_unroll_count(vsize) self.unroll_loop_iterations(loop, self.unroll_count) - loop.operations = self.get_newoperations() - self.clear_newoperations(); # vectorize graph = DependencyGraph(loop) @@ -210,8 +207,6 @@ def unroll_loop_iterations(self, loop, unroll_count): """ Unroll the loop X times. unroll_count + 1 = unroll_factor """ numops = len(loop.operations) - label_op = loop.label - jump_op = loop.jump # use the target token of the label #target_token = label_op.getdescr() #if not we_are_translated(): @@ -223,33 +218,32 @@ # jump_op.setdescr(target_token) #assert jump_op.is_final() - self.emit_unrolled_operation(label_op) + #self.emit_unrolled_operation(label_op) + + #for i in range(0,numops): + # op = loop.operations[i].copy() + # if op.is_guard(): + # assert isinstance(op, GuardResOp) + # failargs = renamer.rename_failargs(op, clone=True) + # snapshot = renamer.rename_rd_snapshot(op.rd_snapshot, clone=True) + # op.setfailargs(failargs) + # op.rd_snapshot = snapshot + # operations.append(op) + # self.emit_unrolled_operation(op) renamer = Renamer() - operations = [] - for i in range(1,numops-1): - op = loop.operations[i].copy() - if op.is_guard(): - assert isinstance(op, GuardResOp) - failargs = renamer.rename_failargs(op, clone=True) - snapshot = renamer.rename_rd_snapshot(op.rd_snapshot, clone=True) - op.setfailargs(failargs) - op.rd_snapshot = snapshot - operations.append(op) - self.emit_unrolled_operation(op) - + operations = loop.operations + unrolled = [] prohibit_opnums = (rop.GUARD_FUTURE_CONDITION, - rop.GUARD_EARLY_EXIT, rop.GUARD_NOT_INVALIDATED) - - orig_jump_args = jump_op.getarglist()[:] + orig_jump_args = loop.jump.getarglist()[:] # it is assumed that #label_args == #jump_args label_arg_count = len(orig_jump_args) for u in range(unroll_count): # fill the map with the renaming boxes. keys are boxes from the label for i in range(label_arg_count): - la = label_op.getarg(i) - ja = jump_op.getarg(i) + la = loop.label.getarg(i) + ja = loop.jump.getarg(i) ja = renamer.rename_box(ja) if la != ja: renamer.start_renaming(la, ja) @@ -284,17 +278,18 @@ renamer.rename_failargs(copied_op, clone=True) copied_op.setfailargs(renamed_failargs) # - self.emit_unrolled_operation(copied_op) + unrolled.append(copied_op) # the jump arguments have been changed # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop # must look like this: label(i(X+1)) ... jump(i(X+2)) - args = jump_op.getarglist() + args = loop.jump.getarglist() for i, arg in enumerate(args): value = renamer.rename_box(arg) - jump_op.setarg(i, value) + loop.jump.setarg(i, value) # - self.emit_unrolled_operation(jump_op) + #self.emit_unrolled_operation(jump_op) + loop.operations = operations + unrolled def linear_find_smallest_type(self, loop): # O(#operations) @@ -456,14 +451,7 @@ fail = True check[left] = None check[right] = None - accum = pack.accum - if accum: - self.packset.accum_vars[accum.var] = accum.pos - - print " %dx %s " % (len(pack.operations), - pack.operations[0].op.getopname()) - if accum: - print " accumulates!" + print " ", pack if fail: assert False @@ -537,9 +525,9 @@ modify_later.append(prev_node) else: for path in prev_node.iterate_paths(None, backwards=True, blacklist=True): - if not path.is_always_pure(exclude_first=True): - path.set_schedule_priority(90) + if not path.is_always_pure(): valid = False + else: if path.last() in zero_deps: del zero_deps[path.last()] if not valid: @@ -559,34 +547,25 @@ for node in zero_deps.keys(): earlyexit.edge_to(node) - # TODO self.relax_guard_to(guard_node, ee_guard_node) + self.mark_guard(guard_node, loop) if one_valid: return graph return None - def relax_guard_to(self, guard_node, other_node): - """ Relaxes a guard operation to an earlier guard. """ - # clone this operation object. if the vectorizer is - # not able to relax guards, it won't leave behind a modified operation - tgt_op = guard_node.getoperation().clone() - guard_node.op = tgt_op - - op = other_node.getoperation() - assert isinstance(tgt_op, GuardResOp) + def mark_guard(self, node, loop): + """ Marks this guard as an early exit! """ + op = node.getoperation() assert isinstance(op, GuardResOp) - olddescr = op.getdescr() descr = None - guard_true_false = tgt_op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE) - if guard_true_false: + if op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE): descr = CompileLoopVersionDescr() else: descr = ResumeAtLoopHeaderDescr() - if olddescr: - descr.copy_all_attributes_from(olddescr) + if op.getdescr(): + descr.copy_all_attributes_from(op.getdescr()) # - tgt_op.setdescr(descr) - tgt_op.setfailargs(op.getfailargs()[:]) - + op.setdescr(descr) + op.setfailargs(loop.inputargs) class CostModel(object): """ Utility to estimate the savings for the new trace loop. @@ -687,7 +666,6 @@ if forward and origin_pack.is_accumulating(): # in this case the splitted accumulator must # be combined. This case is not supported - import pdb; pdb. set_trace() raise NotAVectorizeableLoop() # if self.contains_pair(lnode, rnode): diff --git a/rpython/jit/metainterp/optimizeopt/version.py b/rpython/jit/metainterp/optimizeopt/version.py --- a/rpython/jit/metainterp/optimizeopt/version.py +++ b/rpython/jit/metainterp/optimizeopt/version.py @@ -113,26 +113,26 @@ jump.setdescr(token) -def index_of_first(opnum, operations, pass_by=0): - """ returns the position of the first operation matching the opnum. - Or -1 if non is found - """ - for i,op in enumerate(operations): - if op.getopnum() == opnum: - if pass_by == 0: - return i - else: - pass_by -= 1 - return -1 +#def index_of_first(opnum, operations, pass_by=0): +# """ returns the position of the first operation matching the opnum. +# Or -1 if non is found +# """ +# for i,op in enumerate(operations): +# if op.getopnum() == opnum: +# if pass_by == 0: +# return i +# else: +# pass_by -= 1 +# return -1 +# +#def find_first_index(self, opnum, pass_by=0): +# """ return the first index of the operation having the same opnum or -1 """ +# return index_of_first(opnum, self.operations, pass_by) +# +#def find_first(self, opnum, pass_by=0): +# index = self.find_first_index(opnum, pass_by) +# if index != -1: +# return self.operations[index] +# return None -def find_first_index(self, opnum, pass_by=0): - """ return the first index of the operation having the same opnum or -1 """ - return index_of_first(opnum, self.operations, pass_by) -def find_first(self, opnum, pass_by=0): - index = self.find_first_index(opnum, pass_by) - if index != -1: - return self.operations[index] - return None - - _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit