Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77452:2cdfa2593741 Date: 2015-05-21 09:56 +0200 http://bitbucket.org/pypy/pypy/changeset/2cdfa2593741/
Log: refactored the dependency construction for guards guard relaxation is now simpler and faster saving backwards edge for each dependency object in the field backward last commit disabled vectorize for test_zjit (still not working) diff --git a/rpython/jit/backend/x86/test/test_vectorize.py b/rpython/jit/backend/x86/test/test_vectorize.py --- a/rpython/jit/backend/x86/test/test_vectorize.py +++ b/rpython/jit/backend/x86/test/test_vectorize.py @@ -47,7 +47,7 @@ asm.mc.AND_rr(ecx.value, edx.value) asm.mc.ADD(eax, ecx) - asm.mc.PSRLDQ_xi((xmm7.value, 4)) + asm.mc.PSRLDQ_xi(xmm7.value, 4) asm.mc.MOVDQ_rx(ecx.value, xmm7.value) asm.mc.AND_rr(ecx.value, edx.value) asm.mc.ADD(eax, ecx) diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -61,7 +61,7 @@ def set_schedule_priority(self, p): for node in self.path: - node.priority = p + node.setpriority(p) def walk(self, node): self.path.append(node) @@ -94,28 +94,12 @@ def getopname(self): return self.op.getopname() + def setpriority(self, value): + self.priority = value + def can_be_relaxed(self): return self.op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE) - def getfailarg_set(self): - op = self.getoperation() - assert isinstance(op, GuardResOp) - args = {} - if op.getfailargs(): - for arg in op.getfailargs(): - args[arg] = None - return args.keys() - elif op.rd_snapshot: - ss = op.rd_snapshot - assert isinstance(ss, Snapshot) - while ss: - for box in ss.boxes: - args[box] = None - ss = ss.prev - - return args.keys() - - def relax_guard_to(self, guard): """ Relaxes a guard operation to an earlier guard. """ # clone this operation object. if the vectorizer is @@ -142,16 +126,17 @@ #if not we_are_translated(): tgt_op.setfailargs(op.getfailargs()) - def edge_to(self, to, arg=None, label=None): + def edge_to(self, to, arg=None, failarg=False, label=None): if self is to: print "debug: tried to put edge from: ", self.op, "to:", to.op return dep = self.depends_on(to) if not dep: #if force or self.independent(idx_from, idx_to): - dep = Dependency(self, to, arg) + dep = Dependency(self, to, arg, failarg) self.adjacent_list.append(dep) - dep_back = Dependency(to, self, arg) + dep_back = Dependency(to, self, arg, failarg) + dep.backward = dep_back to.adjacent_list_back.append(dep_back) if not we_are_translated(): if label is None: @@ -160,9 +145,14 @@ else: if not dep.because_of(arg): dep.add_dependency(self,to,arg) + # if a fail argument is overwritten by another normal + # dependency it will remove the failarg flag + if not (dep.is_failarg() and failarg): + dep.set_failarg(False) if not we_are_translated() and label is not None: _label = getattr(dep, 'label', '') dep.label = _label + ", " + label + return dep def clear_dependencies(self): self.adjacent_list = [] @@ -226,7 +216,7 @@ any other case. """ for edge in self.adjacent_list: - if edge.to == to: + if edge.to is to: return edge return None @@ -333,14 +323,15 @@ class Dependency(object): - def __init__(self, at, to, arg, flow=True): + def __init__(self, at, to, arg, failarg=False): assert at != to self.args = [] if arg is not None: self.add_dependency(at, to, arg) self.at = at self.to = to - self.flow = True + self.failarg = failarg + self.backward = None def because_of(self, var): for arg in self.args: @@ -371,11 +362,13 @@ def add_dependency(self, at, to, arg): self.args.append((at,arg)) - def set_flow(self, flow): - self.flow = flow + def set_failarg(self, value): + self.failarg = value + if self.backward: + self.backward.failarg = value - def get_flow(self): - return self.flow + def is_failarg(self): + return self.failarg def reverse_direction(self, ref): """ if the parameter index is the same as idx_to then @@ -505,13 +498,13 @@ for i,node in enumerate(self.nodes): op = node.op if op.is_always_pure(): - node.priority = 1 + node.setpriority(1) if op.is_guard(): - node.priority = 2 + node.setpriority(2) # the label operation defines all operations at the # beginning of the loop if op.getopnum() == rop.LABEL and i != jump_pos: - node.priority = 100 + node.setpriority(100) label_pos = i for arg in op.getarglist(): tracker.define(arg, node) @@ -535,11 +528,11 @@ elif op.is_guard(): self.guards.append(node) else: - self._build_non_pure_dependencies(node, tracker) + self.build_non_pure_dependencies(node, tracker) # pass 2 correct guard dependencies for guard_node in self.guards: op = guard_node.getoperation() - self._build_guard_dependencies(guard_node, op.getopnum(), tracker) + self.build_guard_dependencies(guard_node, op.getopnum(), tracker) # pass 3 find schedulable nodes jump_node = self.nodes[jump_pos] label_node = self.nodes[label_pos] @@ -552,38 +545,69 @@ if node.provides_count() == 0: node.edge_to(jump_node, None, label='jump') - def _build_guard_dependencies(self, guard_node, guard_opnum, tracker): + def guard_argument_protection(self, guard_node, tracker): + """ the parameters the guard protects are an indicator for + dependencies. Consider the example: + i3 = ptr_eq(p1,p2) + guard_true(i3) [...] + + guard_true|false are exceptions because they do not directly + protect the arguments, but a comparison function does. + """ + guard_op = guard_node.getoperation() + guard_opnum = guard_op.getopnum() + if guard_opnum in (rop.GUARD_TRUE, rop.GUARD_FALSE): + for dep in guard_node.depends(): + op = dep.to.getoperation() + for arg in op.getarglist(): + if isinstance(arg, Box): + self.guard_exit_dependence(guard_node, arg, tracker) + elif guard_op.is_foldable_guard(): + # these guards carry their protected variables directly as a parameter + for arg in guard_node.getoperation().getarglist(): + if isinstance(arg, Box): + self.guard_exit_dependence(guard_node, arg, tracker) + elif guard_opnum == rop.GUARD_NOT_FORCED_2: + # must be emitted before finish, thus delayed the longest + guard_node.setpriority(-10) + elif guard_opnum in (rop.GUARD_OVERFLOW, rop.GUARD_NO_OVERFLOW): + # previous operation must be an ovf_operation + guard_node.setpriority(100) + prev_node = self.nodes[guard_node.getindex()-1] + assert prev_node.getoperation().is_ovf() + prev_node.edge_to(guard_node, None, label='overflow') + elif guard_opnum == rop.GUARD_NOT_FORCED: + # previous op must be one that can raise + guard_node.setpriority(100) + prev_node = self.nodes[guard_node.getindex()-1] + assert prev_node.getoperation().can_raise() + prev_node.edge_to(guard_node, None, label='forced') + elif guard_opnum in (rop.GUARD_NO_EXCEPTION, rop.GUARD_EXCEPTION): + # previous op must be one that can raise or a not forced guard + guard_node.setpriority(100) + prev_node = self.nodes[guard_node.getindex()-1] + prev_node.edge_to(guard_node, None, label='exception') + if not prev_node.getoperation().getopnum() == rop.GUARD_NOT_FORCED: + assert prev_node.getoperation().can_raise() + else: + pass # not invalidated, early exit, future condition! + + def guard_exit_dependence(self, guard_node, var, tracker): + def_node = tracker.definition(var) + for dep in def_node.provides(): + if guard_node.is_before(dep.to) and dep.because_of(var): + guard_node.edge_to(dep.to, var, label='guard_exit('+str(var)+')') + + def build_guard_dependencies(self, guard_node, guard_opnum, tracker): if guard_opnum >= rop.GUARD_NOT_INVALIDATED: - # ignure invalidated & future condition guard + # ignore invalidated & future condition guard & early exit return - # 'GUARD_TRUE/1d', - # 'GUARD_FALSE/1d', - # 'GUARD_VALUE/2d', - # 'GUARD_CLASS/2d', - # 'GUARD_NONNULL/1d', - # 'GUARD_ISNULL/1d', - # 'GUARD_NONNULL_CLASS/2d', + # true dependencies guard_op = guard_node.op for arg in guard_op.getarglist(): tracker.depends_on_arg(arg, guard_node) - - variables = [] - for dep in guard_node.depends(): - op = dep.to.op - for arg in op.getarglist(): - if isinstance(arg, Box): - variables.append(arg) - if op.result: - variables.append(op.result) - # - for var in variables: - try: - def_node = tracker.definition(var) - for dep in def_node.provides(): - if guard_node.is_before(dep.to) and dep.because_of(var): - guard_node.edge_to(dep.to, var, label='prev('+str(var)+')') - except KeyError: - pass + # dependencies to uses of arguments it protects + self.guard_argument_protection(guard_node, tracker) # handle fail args if guard_op.getfailargs(): for arg in guard_op.getfailargs(): @@ -595,38 +619,11 @@ descr = guard_op.getdescr() if at.is_before(guard_node) and \ not isinstance(descr, compile.ResumeAtLoopHeaderDescr): - at.edge_to(guard_node, arg, label="fail") + at.edge_to(guard_node, arg, failarg=True, label="fail") except KeyError: assert False - # - # guards check overflow or raise are directly dependent - # find the first non guard operation - prev_op_idx = guard_node.opidx - 1 - while prev_op_idx > 0: - prev_node = self.nodes[prev_op_idx] - if prev_node.op.is_guard(): - prev_op_idx -= 1 - else: - break - prev_node = self.nodes[prev_op_idx] - guard_op = guard_node.getoperation() - prev_op = prev_node.getoperation() - if guard_op.is_guard_exception() and prev_op.can_raise(): - self.guard_inhert(prev_node, guard_node) - elif guard_op.is_guard_overflow() and prev_op.is_ovf(): - self.guard_inhert(prev_node, guard_node) - elif guard_op.getopnum() == rop.GUARD_NOT_FORCED and prev_op.can_raise(): - self.guard_inhert(prev_node, guard_node) - elif guard_op.getopnum() == rop.GUARD_NOT_FORCED_2 and prev_op.can_raise(): - self.guard_inhert(prev_node, guard_node) - def guard_inhert(self, at, to): - at.edge_to(to, None, label='inhert') - for dep in at.provides(): - if to.is_before(dep.to): - to.edge_to(dep.to, None, label='inhert') - - def _build_non_pure_dependencies(self, node, tracker): + def build_non_pure_dependencies(self, node, tracker): op = node.op if node.loads_from_complex_object(): # If this complex object load operation loads an index that has been diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py --- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py @@ -260,7 +260,7 @@ [p0, p1, i2] # 0: 1,2?,3?,4?,5? i3 = int_add(i2,1) # 1: 2 i4 = call(p0, i3, descr=nonwritedescr) # 2: 3,4,5? - guard_no_exception() [i2] # 3: 4,5? + guard_no_exception() [i2] # 3: 4?,5? p2 = getarrayitem_gc(p1,i3,descr=intarraydescr) # 4: 5 jump(p2, p1, i3) # 5: """ @@ -271,12 +271,26 @@ [p0, p1, i2, i5] # 0: 1,2?,3?,4?,5? i3 = int_add(i2,1) # 1: 2 i4 = call(i5, i3, descr=nonwritedescr) # 2: 3,4,5? - guard_no_exception() [i2] # 3: 4,5? + guard_no_exception() [i2] # 3: 5? p2 = getarrayitem_gc(p1,i3,descr=chararraydescr) # 4: 5 jump(p2, p1, i3, i5) # 5: """ self.assert_dependencies(ops, full_check=True) + def test_not_forced(self): + ops=""" + [p0, p1, i2, i5] # 0: 1,2,4?,5,6 + i4 = call(i5, i2, descr=nonwritedescr) # 1: 2,4,6 + guard_not_forced() [i2] # 2: 3 + guard_no_exception() [] # 3: 6 + i3 = int_add(i2,1) # 4: 5 + p2 = getarrayitem_gc(p1,i3,descr=chararraydescr) # 5: 6 + jump(p2, p1, i2, i5) # 6: + """ + self.assert_dependencies(ops, full_check=True) + assert self.last_graph.nodes[2].priority == 100 + assert self.last_graph.nodes[3].priority == 100 + def test_setarrayitem_dependency(self): ops=""" [p0, i1] # 0: 1,2?,3?,4? diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -1058,12 +1058,12 @@ guard_not_invalidated() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41] guard_early_exit() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41] i50 = int_add(i28, 1) + i46 = int_add(i44, 8) i48 = int_add(i41, 8) - i46 = int_add(i44, 8) i51 = int_add(i37, 8) i52 = int_ge(i50, i18) + i55 = int_add(i44, 16) i54 = int_add(i41, 16) - i55 = int_add(i44, 16) i56 = int_add(i37, 16) i53 = int_add(i28, 2) i57 = int_ge(i53, i18) diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -21,6 +21,7 @@ return 'NotAVectorizeableLoop()' def debug_print_operations(loop): + """ NOT_RPYTHON """ if not we_are_translated(): print('--- loop instr numbered ---') def ps(snap): @@ -43,21 +44,26 @@ inline_short_preamble, start_state, False) orig_ops = loop.operations try: + debug_start("vec-opt-loop") + metainterp_sd.logger_opt.log_loop(loop.inputargs, loop.operations, "unroll", -2, 0, "pre vectorize") metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY) opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations) opt.propagate_all_forward() metainterp_sd.profiler.count(Counters.OPT_VECTORIZED) + + metainterp_sd.logger_opt.log_loop(loop.inputargs, loop.operations, "vec", -2, 0, "post vectorize") except NotAVectorizeableLoop: # vectorization is not possible loop.operations = orig_ops except Exception as e: loop.operations = orig_ops - debug_start("failed to vec loop") - metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations) - from rpython.rtyper.lltypesystem import lltype - from rpython.rtyper.lltypesystem.lloperation import llop - llop.debug_print_traceback(lltype.Void) - debug_stop("failed to vec loop") + debug_print("failed to vectorize loop. THIS IS A FATAL ERROR!") + if we_are_translated(): + from rpython.rtyper.lltypesystem import lltype + from rpython.rtyper.lltypesystem.lloperation import llop + llop.debug_print_traceback(lltype.Void) + finally: + debug_stop("vec-opt-loop") class VectorizingOptimizer(Optimizer): """ Try to unroll the loop and find instructions to group """ @@ -427,37 +433,47 @@ label_node = graph.getnode(0) ee_guard_node = graph.getnode(self.early_exit_idx) guards = graph.guards - fail_args = [] for guard_node in guards: if guard_node is ee_guard_node: continue - del_deps = [] - pullup = [] + modify_later = [] valid_trans = True last_prev_node = None for path in guard_node.iterate_paths(ee_guard_node, True): prev_node = path.second() - if fail_args_break_dependency(guard_node, prev_node, ee_guard_node): + dep = prev_node.depends_on(guard_node) + if dep.is_failarg(): + # this dependency we are able to break because it is soley + # relevant due to one or multiple fail args if prev_node == last_prev_node: + # ... + # o o + # \ / + # (a) + # | + # (g) + # this graph yields 2 paths from (g), thus (a) is + # remembered and skipped continue - del_deps.append((prev_node, guard_node)) + modify_later.append((prev_node, guard_node)) else: if path.has_no_side_effects(exclude_first=True, exclude_last=True): - #index_guards[guard.getindex()] = IndexGuard(guard, path.path[:]) path.set_schedule_priority(10) - pullup.append(path.last_but_one()) + modify_later.append((path.last_but_one(), None)) else: valid_trans = False break last_prev_node = prev_node if valid_trans: - for a,b in del_deps: - a.remove_edge_to(b) - for lbo in pullup: - if lbo is ee_guard_node: - continue - ee_guard_node.remove_edge_to(lbo) - label_node.edge_to(lbo, label='pullup') + for a,b in modify_later: + if b is not None: + a.remove_edge_to(b) + else: + last_but_one = a + if last_but_one is ee_guard_node: + continue + ee_guard_node.remove_edge_to(last_but_one) + label_node.edge_to(last_but_one, label='pullup') # only the last guard needs a connection guard_node.edge_to(ee_guard_node, label='pullup-last-guard') guard_node.relax_guard_to(ee_guard_node) @@ -516,26 +532,6 @@ return False return True -def fail_args_break_dependency(guard, prev_op, target_guard): - failargs = guard.getfailarg_set() - new_failargs = target_guard.getfailarg_set() - - op = prev_op.getoperation() - if not op.is_always_pure(): # TODO has_no_side_effect(): - return True - if op.result is not None: - arg = op.result - if arg not in failargs or \ - arg in failargs and arg in new_failargs: - return False - for arg in op.getarglist(): - if arg not in failargs or \ - arg in failargs and arg in new_failargs: - return False - # THINK about: increased index in fail arg, but normal index on arglist - # this might be an indicator for edge removal - return True - class PackType(PrimitiveTypeMixin): UNKNOWN_TYPE = '-' _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit