Author: Richard Plangger <r...@pasra.at> Branch: vecopt2 Changeset: r77113:e92fd08ff586 Date: 2015-04-13 11:52 +0200 http://bitbucket.org/pypy/pypy/changeset/e92fd08ff586/
Log: relaxing guards dependency works for the first simple case diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -32,8 +32,32 @@ def __init__(self,path): self.path = path - def walk(self, idx): - self.path.append(idx) + def second(self): + if len(self.path) <= 1: + return None + return self.path[1] + + def last_but_one(self): + if len(self.path) < 2: + return None + return self.path[len(self.path)-2] + + def has_no_side_effects(self, exclude_first=False, exclude_last=False): + last = len(self.path)-1 + count = len(self.path) + i = 0 + if exclude_first: + i += 1 + if exclude_last: + count -= 1 + while i < count: + if not self.path[i].op.has_no_side_effect(): + return False + i += 1 + return True + + def walk(self, node): + self.path.append(node) def clone(self): return Path(self.path[:]) @@ -60,7 +84,7 @@ def getopname(self): return self.op.getopname() - def edge_to(self, to, arg, label=None): + def edge_to(self, to, arg=None, label=None): assert self != to dep = self.depends_on(to) if not dep: @@ -91,8 +115,8 @@ return None def clear_dependencies(self): - self.adjacent_list.clear() - self.adjacent_list_back.clear() + self.adjacent_list = [] + self.adjacent_list_back = [] def is_guard_early_exit(self): return self.op.getopnum() == rop.GUARD_NO_EARLY_EXIT @@ -134,7 +158,7 @@ def provides(self): return self.adjacent_list - def depends_count(self, idx): + def depends_count(self): return len(self.adjacent_list_back) def depends(self): @@ -178,24 +202,40 @@ worklist.append(dep.to) return True - def iterate_paths_backward(self, ai, bi): - if ai == bi: + def iterate_paths(self, to, backwards=False): + """ yield all nodes from self leading to 'to' """ + if self == to: return - if ai > bi: - ai, bi = bi, ai - worklist = [(Path([bi]),bi)] + worklist = [(Path([self]),self)] while len(worklist) > 0: - path,idx = worklist.pop() - for dep in self.depends(idx): - if ai > dep.idx_from or dep.points_backward(): - # this points above ai (thus unrelevant) - continue + path,node = worklist.pop() + if backwards: + iterdir = node.depends() + else: + iterdir = node.provides() + for dep in iterdir: cloned_path = path.clone() - cloned_path.walk(dep.idx_from) - if dep.idx_from == ai: + cloned_path.walk(dep.to) + if dep.to == to: yield cloned_path else: - worklist.append((cloned_path,dep.idx_from)) + worklist.append((cloned_path,dep.to)) + + def remove_edge_to(self, node): + i = 0 + while i < len(self.adjacent_list): + dep = self.adjacent_list[i] + if dep.to == node: + del self.adjacent_list[i] + break + i += 1 + i = 0 + while i < len(node.adjacent_list_back): + dep = node.adjacent_list_back[i] + if dep.to == self: + del node.adjacent_list_back[i] + break + i += 1 def getedge_to(self, other): for dep in self.adjacent_list: @@ -203,18 +243,6 @@ return dep return None - def i_remove_dependency(self, idx_at, idx_to): - at = self.nodes[idx_at] - to = self.nodes[idx_to] - self.remove_dependency(at, to) - def remove_dependency(self, at, to): - """ Removes a all dependencies that point to 'to' """ - self.adjacent_list[at] = \ - [d for d in self.adjacent_list[at] if d.to != to] - self.adjacent_list[to] = \ - [d for d in self.adjacent_list[to] if d.at != at] - - return args def __repr__(self): return "Node(opidx: %d)"%self.opidx @@ -222,6 +250,8 @@ return not self.__eq__(other) def __eq__(self, other): + if other is None: + return False assert isinstance(other, Node) return self.opidx == other.opidx @@ -347,7 +377,7 @@ def __init__(self, operations): self.nodes = [ Node(op,i) for i,op in enumerate(operations) ] self.memory_refs = {} - self.schedulable_nodes = [0] # label is always scheduleable + self.schedulable_nodes = [] self.index_vars = {} self.guards = [] self.build_dependencies() @@ -365,13 +395,15 @@ """ tracker = DefTracker(self) # + label_pos = 0 + jump_pos = len(self.nodes)-1 intformod = IntegralForwardModification(self.memory_refs, self.index_vars) # pass 1 for i,node in enumerate(self.nodes): op = node.op # the label operation defines all operations at the # beginning of the loop - if op.getopnum() == rop.LABEL: + if op.getopnum() == rop.LABEL and i != jump_pos: # TODO is it valid that a label occurs at the end of a trace? ee_node = self.nodes[i+1] if ee_node.is_guard_early_exit(): @@ -398,16 +430,18 @@ for guard_node in self.guards: self._build_guard_dependencies(guard_node, op.getopnum(), tracker) # pass 3 find schedulable nodes - jump_pos = len(self.nodes)-1 jump_node = self.nodes[jump_pos] + label_node = self.nodes[label_pos] + self.schedulable_nodes.append(label_node) for node in self.nodes: - if node.dependency_count() == 0: - self.schedulable_nodes.append(node.opidx) - # every leaf instruction points to the jump_op. in theory every instruction - # points to jump_op. this forces the jump/finish op to be the last operation if node != jump_node: + if node.dependency_count() == 0: + self.schedulable_nodes.append(node) + # every leaf instruction points to the jump_op. in theory every instruction + # points to jump_op. this forces the jump/finish op to be the last operation if node.provides_count() == 0: node.edge_to(jump_node, None, label='jump') + print "\n\neee", self.schedulable_nodes def _build_guard_dependencies(self, guard_node, guard_opnum, tracker): if guard_opnum >= rop.GUARD_NOT_INVALIDATED: @@ -557,10 +591,10 @@ self.schedulable_nodes = self.graph.schedulable_nodes self.sched_data = sched_data - def has_more_to_schedule(self): + def has_more(self): return len(self.schedulable_nodes) > 0 - def next_schedule_index(self): + def next(self): return self.schedulable_nodes[0] def schedulable(self, indices): @@ -578,37 +612,24 @@ def schedule_all(self, opindices): while len(opindices) > 0: - print "sched" opidx = opindices.pop() for i,node in enumerate(self.schedulable_nodes): if node == opidx: + self.schedule(i) break - else: - i = -1 - if i != -1: - self.schedule(i) def schedule(self, index): node = self.schedulable_nodes[index] del self.schedulable_nodes[index] to_del = [] - adj_list = self.graph.adjacent_list[node] - for dep in adj_list: - self.graph.remove_dependency_by_index(node, dep.idx_to) - self.graph.remove_dependency_by_index(dep.idx_to, node) - print "remove", node, "<=>", dep.idx_to - if self.is_schedulable(dep.idx_to): - print "sched", dep.idx_to - self.schedulable_nodes.append(dep.idx_to) - # - # TODO for dep in self.graph.provides(node): - # candidate = dep.idx_to + print " schedule", node.getoperation() + for dep in node.provides()[:]: + node.remove_edge_to(dep.to) + print " >=X=>", node, dep.to, "count",dep.to.depends_count() + if dep.to.depends_count() == 0: + self.schedulable_nodes.append(dep.to) node.clear_dependencies() - def is_schedulable(self, idx): - print "is sched", idx, "count:", self.graph.depends_count(idx), self.graph.adjacent_list[idx] - return self.graph.depends_count(idx) == 0 - class IntegralForwardModification(object): """ Calculates integral modifications on an integer box. """ def __init__(self, memory_refs, index_vars): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py --- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py @@ -121,7 +121,6 @@ node = self.last_graph.getnode(idx) return self.last_graph.memory_refs[node] - class BaseTestDependencyGraph(DependencyBaseTest): def test_dependency_empty(self): ops = """ @@ -298,22 +297,10 @@ self.assert_dependent(1,2) self.assert_dependent(0,3) - def test_setarrayitem_depend_with_no_memref_info(self): - ops=""" - [p0, i1] # 0: 1,2,3?,4? - setarrayitem_raw(p0, i1, 1, descr=floatarraydescr) # 1: 4? - i2 = int_add(i1,1) # 2: 3 - setarrayitem_raw(p0, i2, 2, descr=floatarraydescr) # 3: 4 - jump(p0, i1) # 4: - """ - self.assert_dependencies(ops, full_check=True) - self.assert_independent(1,2) - self.assert_independent(1,3) - def test_setarrayitem_dont_depend_with_memref_info(self): ops=""" [p0, i1] # 0: 1,2,3?,4? - setarrayitem_raw(p0, i1, 1, descr=chararraydescr) # 1: 3?,4? + setarrayitem_raw(p0, i1, 1, descr=chararraydescr) # 1: 4 i2 = int_add(i1,1) # 2: 3 setarrayitem_raw(p0, i2, 2, descr=chararraydescr) # 3: 4 jump(p0, i1) # 4: diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -809,7 +809,6 @@ """.format(op=op,descr=descr,stride=stride) loop = self.parse_loop(ops) vopt = self.combine_packset(loop,3) - self.debug_print_operations(loop) assert len(vopt.dependency_graph.memory_refs) == 12 if len(vopt.packset.packs) != 4: for pack in vopt.packset.packs: @@ -905,6 +904,7 @@ def test_123(self): ops = """ [i0, i1, i2, i3, i4] + guard_no_early_exit() [] debug_merge_point(0, 0, '1') i6 = int_mul(i0, 8) i7 = raw_load(i2, i6, descr=intarraydescr) @@ -915,9 +915,10 @@ i12 = int_lt(i11, i1) guard_true(i12) [i4, i3, i2, i1, i11] debug_merge_point(0, 0, '2') - label(i11, i1, i2, i3, i4) + jump(i11, i1, i2, i3, i4) """ vopt = self.schedule(self.parse_loop(ops),1) + self.debug_print_operations(vopt.loop) def test_schedule_vectorized_trace_1(self): ops = """ @@ -937,6 +938,5 @@ vopt = self.schedule(self.parse_loop(ops),1) self.debug_print_operations(vopt.loop) - class TestLLtype(BaseTestVectorize, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -189,8 +189,6 @@ copied_op.setfailargs(args) # self.emit_unrolled_operation(copied_op) - #self.vec_info.index = len(self._newoperations)-1 - #self.vec_info.inspect_operation(copied_op) # the jump arguments have been changed # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop @@ -248,7 +246,7 @@ def build_dependency_graph(self): self.dependency_graph = DependencyGraph(self.loop.operations) - #self.relax_guard_dependencies() + self.relax_guard_dependencies() def find_adjacent_memory_refs(self): """ the pre pass already builds a hash of memory references and the @@ -279,7 +277,6 @@ self.packset.add_pair(node_a, node_b) def extend_packset(self): - print "extend_packset" pack_count = self.packset.pack_count() while True: for pack in self.packset.packs: @@ -348,68 +345,67 @@ def schedule(self): self.clear_newoperations() scheduler = Scheduler(self.dependency_graph, VecScheduleData()) - while scheduler.has_more_to_schedule(): - candidate = scheduler.next_to_schedule() - pack = self.packset.pack_for_operation(candidate) - if pack: - self._schedule_pack(scheduler, pack) + print "scheduling loop" + while scheduler.has_more(): + candidate = scheduler.next() + print " candidate", candidate + if candidate.pack: + pack = candidate.pack + if scheduler.schedulable(pack.operations): + vop = scheduler.sched_data.as_vector_operation(pack) + self.emit_operation(vop) + scheduler.schedule_all(pack.operations) + else: + scheduler.schedule_later(0) else: self.emit_operation(candidate.getoperation()) scheduler.schedule(0) self.loop.operations = self._newoperations[:] - def _schedule_pack(self, scheduler, pack): - opindices = [ e.opidx for e in pack.operations ] - if scheduler.schedulable(opindices): - vop = scheduler.sched_data \ - .as_vector_operation(pack, self.loop.operations) - self.emit_operation(vop) - scheduler.schedule_all(opindices) - else: - scheduler.schedule_later(0) - def relax_guard_dependencies(self): early_exit_idx = 1 - operations = self.loop.operations - assert operations[early_exit_idx].getopnum() == \ - rop.GUARD_NO_EARLY_EXIT - target_guard = operations[early_exit_idx] - for guard_idx in self.dependency_graph.guards: - if guard_idx == early_exit_idx: + label_idx = 0 + label = self.dependency_graph.getnode(label_idx) + ee_guard = self.dependency_graph.getnode(early_exit_idx) + if not ee_guard.getopnum() == rop.GUARD_NO_EARLY_EXIT: + return # cannot relax + + for guard_node in self.dependency_graph.guards: + if guard_node == ee_guard: continue - guard = operations[guard_idx] - if guard.getopnum() not in (rop.GUARD_TRUE,rop.GUARD_FALSE): + if guard_node.getopnum() not in (rop.GUARD_TRUE,rop.GUARD_FALSE): continue - self.dependency_graph.edge(early_exit_idx, guard_idx, early_exit_idx, label='EE') - print "put", guard_idx, "=>", early_exit_idx del_deps = [] - for path in self.dependency_graph.iterate_paths_backward(guard_idx, early_exit_idx): - op_idx = path.path[1] - print "path", path.path - op = operations[op_idx] - if fail_args_break_dependency(guard, guard_idx, target_guard, early_exit_idx, op, op_idx): - print " +>+>==> break", op_idx, "=>", guard_idx - del_deps.append(op_idx) - for dep_idx in del_deps: - self.dependency_graph.remove_dependency_by_index(dep_idx, guard_idx) + pullup = [] + iterb = guard_node.iterate_paths(ee_guard, True) + last_prev_node = None + for path in iterb: + prev_node = path.second() + if fail_args_break_dependency(guard_node, prev_node, ee_guard): + if prev_node == last_prev_node: + continue + print ">=XXX=> ", prev_node, "=>", guard_node + del_deps.append((prev_node,guard_node)) + else: + pullup.append(path) + last_prev_node = prev_node + for a,b in del_deps: + a.remove_edge_to(b) + for candidate in pullup: + lbo = candidate.last_but_one() + if candidate.has_no_side_effects(exclude_first=True, exclude_last=True): + ee_guard.remove_edge_to(lbo) + label.edge_to(lbo, label='pullup') + guard_node.edge_to(ee_guard, label='pullup') + label.remove_edge_to(ee_guard) - del_deps = [] - for dep in self.dependency_graph.provides(early_exit_idx): - del_deps.append(dep.idx_to) - for dep_idx in del_deps: - self.dependency_graph.remove_dependency_by_index(1, dep_idx) - self.dependency_graph.edge(dep_idx, 0, dep_idx) - last_idx = len(operations) - 1 - self.dependency_graph.remove_dependency_by_index(0,1) - self.dependency_graph.edge(last_idx, early_exit_idx, last_idx) +def fail_args_break_dependency(guard, prev_op, target_guard): + failargs = set(guard.getoperation().getfailargs()) + new_failargs = set(target_guard.getoperation().getfailargs()) -def fail_args_break_dependency(guard, guard_idx, target_guard, target_guard_idx, op, op_idx): - failargs = set(guard.getfailargs()) - new_failargs = set(target_guard.getfailargs()) - - print " args:", [op.result] + op.getarglist()[:], " &&& ", failargs, " !!! ", new_failargs - if op.is_array_op(): + op = prev_op.getoperation() + if not op.has_no_side_effect(): return True if op.result is not None: arg = op.result @@ -420,28 +416,29 @@ if arg not in failargs or \ arg in failargs and arg in new_failargs: return False + # THINK about: increased index in fail arg, but normal index on arglist + # this might be an indicator for edge removal return True class VecScheduleData(SchedulerData): def __init__(self): self.box_to_vbox = {} - def as_vector_operation(self, pack, operations): + def as_vector_operation(self, pack): assert len(pack.operations) > 1 self.pack = pack - ops = [operations[w.opidx] for w in pack.operations] - op0 = operations[pack.operations[0].opidx] + op0 = pack.operations[0].getoperation() assert op0.vector != -1 args = op0.getarglist()[:] if op0.vector in (rop.VEC_RAW_LOAD, rop.VEC_RAW_STORE): args.append(ConstInt(0)) - vopt = ResOperation(op0.vector, args, + vop = ResOperation(op0.vector, args, op0.result, op0.getdescr()) - self._inspect_operation(vopt,ops) # op0 is for dispatch only + self._inspect_operation(vop) # op0 is for dispatch only #if op0.vector not in (rop.VEC_RAW_LOAD, rop.VEC_RAW_STORE): # op_count = len(pack.operations) # args.append(ConstInt(op_count)) - return vopt + return vop def _pack_vector_arg(self, vop, op, i, vbox): arg = op.getarg(i) @@ -463,11 +460,13 @@ return vbox bin_arith_trans = """ - def _vectorize_{name}(self, vop, ops): + def _vectorize_{name}(self, vop): vbox_arg_0 = None vbox_arg_1 = None vbox_result = None - for i, op in enumerate(ops): + ops = self.pack.operations + for i, node in enumerate(ops): + op = node.getoperation() vbox_arg_0 = self._pack_vector_arg(vop, op, 0, vbox_arg_0) vbox_arg_1 = self._pack_vector_arg(vop, op, 1, vbox_arg_1) vbox_result= self._pack_vector_result(vop, op, vbox_result) @@ -482,16 +481,20 @@ exec py.code.Source(bin_arith_trans.format(name='VEC_FLOAT_SUB')).compile() del bin_arith_trans - def _vectorize_VEC_RAW_LOAD(self, vop, ops): + def _vectorize_VEC_RAW_LOAD(self, vop): vbox_result = None - for i, op in enumerate(ops): + ops = self.pack.operations + for i, node in enumerate(ops): + op = node.getoperation() vbox_result= self._pack_vector_result(vop, op, vbox_result) vbox_result.item_count = len(ops) vop.setarg(vop.numargs()-1,ConstInt(len(ops))) - def _vectorize_VEC_RAW_STORE(self, vop, ops): + def _vectorize_VEC_RAW_STORE(self, vop): vbox_arg_2 = None - for i, op in enumerate(ops): + ops = self.pack.operations + for i, node in enumerate(ops): + op = node.getoperation() vbox_arg_2 = self._pack_vector_arg(vop, op, 2, vbox_arg_2) vbox_arg_2.item_count = len(ops) vop.setarg(vop.numargs()-1,ConstInt(len(ops))) @@ -499,28 +502,12 @@ VecScheduleData._inspect_operation = \ make_dispatcher_method(VecScheduleData, '_vectorize_') - def isomorphic(l_op, r_op): - """ Described in the paper ``Instruction-Isomorphism in Program Execution''. - I think this definition is to strict. TODO -> find another reference - For now it must have the same instruction type, the array parameter must be equal, - and it must be of the same type (both size in bytes and type of array). + """ Same instructions have the same operation name. + TODO what about parameters? """ if l_op.getopnum() == r_op.getopnum(): return True - # the stronger counterpart. TODO which structural equivalence is - # needed here? - #if l_op.getopnum() == r_op.getopnum() and \ - # l_op.getarg(0) == r_op.getarg(0): - # l_d = l_op.getdescr() - # r_d = r_op.getdescr() - # if l_d is not None and r_d is not None: - # if l_d.get_item_size_in_bytes() == r_d.get_item_size_in_bytes(): - # if l_d.getflag() == r_d.getflag(): - # return True - # elif l_d is None and r_d is None: - # return True - #return False class PackSet(object): @@ -536,7 +523,6 @@ return len(self.packs) def add_pair(self, l, r): - print "adds", l, r self.packs.append(Pair(l,r)) def can_be_packed(self, lnode, rnode): @@ -611,6 +597,8 @@ def __init__(self, ops): self.operations = ops self.savings = 0 + for node in self.operations: + node.pack = self def rightmost_match_leftmost(self, other): assert isinstance(other, Pack) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit