Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77454:1f73ac83382c Date: 2015-05-21 18:26 +0200 http://bitbucket.org/pypy/pypy/changeset/1f73ac83382c/
Log: rewritten the guard strengthening. it is now independent from vecopt (still contained in the same file though). the previous version was not correct and could only rewrite int_add. now if a guard implies a previous guard, the earlier guard is replaced directly diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -2,7 +2,7 @@ from rpython.jit.metainterp import compile from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method -from rpython.jit.metainterp.resoperation import (rop, GuardResOp) +from rpython.jit.metainterp.resoperation import (rop, GuardResOp, ResOperation) from rpython.jit.metainterp.resume import Snapshot from rpython.jit.codewriter.effectinfo import EffectInfo from rpython.jit.metainterp.history import BoxPtr, ConstPtr, ConstInt, BoxInt, Box, Const, BoxFloat @@ -83,6 +83,9 @@ self.emitted = False self.schedule_position = -1 self.priority = 0 + # save the operation that produces the result for the first argument + # only for guard_true/guard_false + self.guard_bool_bool_node = None def getoperation(self): return self.op @@ -531,8 +534,7 @@ self.build_non_pure_dependencies(node, tracker) # pass 2 correct guard dependencies for guard_node in self.guards: - op = guard_node.getoperation() - self.build_guard_dependencies(guard_node, op.getopnum(), tracker) + self.build_guard_dependencies(guard_node, tracker) # pass 3 find schedulable nodes jump_node = self.nodes[jump_pos] label_node = self.nodes[label_pos] @@ -559,9 +561,15 @@ if guard_opnum in (rop.GUARD_TRUE, rop.GUARD_FALSE): for dep in guard_node.depends(): op = dep.to.getoperation() - for arg in op.getarglist(): - if isinstance(arg, Box): - self.guard_exit_dependence(guard_node, arg, tracker) + if op.returns_bool_result() and op.result == guard_op.getarg(0): + guard_node.guard_bool_bool_node = dep.to + for arg in op.getarglist(): + if isinstance(arg, Box): + self.guard_exit_dependence(guard_node, arg, tracker) + break + else: + raise RuntimeError("guard_true/false has no operation that " \ + "returns the bool for the arg 0") elif guard_op.is_foldable_guard(): # these guards carry their protected variables directly as a parameter for arg in guard_node.getoperation().getarglist(): @@ -598,12 +606,12 @@ if guard_node.is_before(dep.to) and dep.because_of(var): guard_node.edge_to(dep.to, var, label='guard_exit('+str(var)+')') - def build_guard_dependencies(self, guard_node, guard_opnum, tracker): - if guard_opnum >= rop.GUARD_NOT_INVALIDATED: + def build_guard_dependencies(self, guard_node, tracker): + guard_op = guard_node.op + if guard_op.getopnum() >= rop.GUARD_NOT_INVALIDATED: # ignore invalidated & future condition guard & early exit return # true dependencies - guard_op = guard_node.op for arg in guard_op.getarglist(): tracker.depends_on_arg(arg, guard_node) # dependencies to uses of arguments it protects @@ -970,6 +978,33 @@ othercoeff = other.coefficient_mul // other.coefficient_div return mycoeff + self.constant - (othercoeff + other.constant) + def emit_operations(self, opt): + box = self.var + if self.coefficient_mul != 1: + box_result = box.clonebox() + opt.emit_operation(ResOperation(rop.INT_MUL, [box, ConstInt(self.coefficient_mul)], box_result)) + box = box_result + if self.coefficient_div != 1: + box_result = box.clonebox() + opt.emit_operation(ResOperation(rop.INT_FLOORDIV, [box, ConstInt(self.coefficient_div)], box_result)) + box = box_result + if self.constant != 0: + box_result = box.clonebox() + opt.emit_operation(ResOperation(rop.INT_ADD, [box, ConstInt(self.constant)], box_result)) + box = box_result + return box + + def compare(self, other): + assert isinstance(other, IndexVar) + v1 = (self.coefficient_mul // self.coefficient_div) + self.constant + v2 = (other.coefficient_mul // other.coefficient_div) + other.constant + if v1 == v2: + return 0 + elif v1 < v2: + return -1 + else: + return 1 + def __repr__(self): if self.is_identity(): return 'IndexVar(%s+%s)' % (self.var, repr(self.next_nonconst)) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -12,7 +12,7 @@ from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph from rpython.jit.metainterp.optimizeopt.unroll import Inliner from rpython.jit.metainterp.optimizeopt.vectorize import (VectorizingOptimizer, MemoryRef, - isomorphic, Pair, NotAVectorizeableLoop, NotAVectorizeableLoop) + isomorphic, Pair, NotAVectorizeableLoop, NotAVectorizeableLoop, GuardStrengthenOpt) from rpython.jit.metainterp.optimize import InvalidLoop from rpython.jit.metainterp.history import ConstInt, BoxInt, get_const_ptr_for_string from rpython.jit.metainterp import executor, compile, resume @@ -102,7 +102,8 @@ opt.extend_packset() opt.combine_packset() opt.schedule() - opt.collapse_index_guards() + gso = GuardStrengthenOpt(opt.dependency_graph.index_vars) + gso.propagate_all_forward(opt.loop) return opt def assert_unroll_loop_equals(self, loop, expected_loop, \ @@ -879,30 +880,6 @@ vopt = self.schedule(loop,1) self.assert_equal(loop, self.parse_loop(vops)) - @pytest.mark.parametrize('unroll', [1]) - def test_vectorize_index_variable_combination(self, unroll): - ops = """ - [p0,i0] - guard_early_exit() [] - i1 = raw_load(p0, i0, descr=floatarraydescr) - i2 = int_add(i0,8) - jump(p0,i2) - """ - vops = """ - [p0,i0] - guard_early_exit() [] - """ + '\n '.join(["i{x} = int_add(i0,{i})".format(i=8*(i+1),x=i+100) for i in range(unroll) ]) + \ - """ - i1 = int_add(i0, {count}) - v1 = vec_raw_load(p0, i0, {elems}, descr=floatarraydescr) - jump(p0,i1) - """.format(count=(unroll+1)*8,elems=unroll+1) - print vops - loop = self.parse_loop(ops) - vopt = self.vectorize(loop,unroll) - self.assert_equal(loop, self.parse_loop(vops)) - - def test_vschedule_trace_1(self): ops = """ [i0, i1, i2, i3, i4] @@ -948,18 +925,22 @@ jump(p0,i2) """ dead_code = '\n '.join([ - "i{t} = int_add(i0,{i})\n i{s} = int_lt(i{t}, 102)".format( - i=i+1, t=i+4, s=i+20) - for i in range(0,15)]) + "i{t1} = int_add(i{t},1)\n i{s} = int_lt(i{t1}, 102)".format( + i=i+1, t1=i+201, t=i+200, s=i+20) + for i in range(0,14)]) opt=""" [p0,i0] guard_early_exit() [p0,i0] - {dead_code} + i200 = int_add(i0, 1) + i400 = int_lt(i200, 102) i2 = int_add(i0, 16) i3 = int_lt(i2, 102) guard_true(i3) [p0,i0] + {dead_code} + i500 = same_as(i2) + i300 = int_lt(i500, 102) i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr) - jump(p0,i2) + jump(p0,i500) """.format(dead_code=dead_code) vopt = self.vectorize(self.parse_loop(ops),15) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1001,10 +982,12 @@ i2 = int_add(i0, 2) i3 = int_lt(i2, 10) guard_true(i3) [p0,i0] + i4 = same_as(i2) + i5 = int_lt(i4, 10) v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) v3 = vec_int_expand(42) v2 = vec_int_mul(v1, v3, 2) - jump(p0,i2) + jump(p0,i4) """ vopt = self.vectorize(self.parse_loop(ops),1) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1028,10 +1011,12 @@ i2 = int_add(i0, 2) i3 = int_lt(i2, 10) guard_true(i3) [p0,i0] + i4 = same_as(i2) + i5 = int_lt(i4, 10) v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) v3 = vec_float_expand(f3) v2 = vec_int_mul(v1, v3, 2) - jump(p0,i2,f3) + jump(p0,i4,f3) """ vopt = self.vectorize(self.parse_loop(ops),1) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1062,10 +1047,11 @@ i48 = int_add(i41, 8) i51 = int_add(i37, 8) i52 = int_ge(i50, i18) - i55 = int_add(i44, 16) - i54 = int_add(i41, 16) - i56 = int_add(i37, 16) - i53 = int_add(i28, 2) + guard_false(i52) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41] + i55 = int_add(i46, 8) + i54 = int_add(i48, 8) + i56 = int_add(i51, 8) + i53 = int_add(i50, 1) i57 = int_ge(i53, i18) guard_false(i57) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41] v61 = vec_raw_load(i21, i44, 2, descr=floatarraydescr) @@ -1106,7 +1092,7 @@ try: vopt = self.vectorize(self.parse_loop(ops)) self.debug_print_operations(vopt.loop) - # TODO verify + py.test.fail("this loop should not be vectorized") except NotAVectorizeableLoop: pass @@ -1117,7 +1103,7 @@ f1 = getarrayitem_raw(p0, i1, descr=floatarraydescr) i2 = cast_float_to_singlefloat(f1) setarrayitem_raw(p1, i1, i2, descr=singlefloatarraydescr) - i3 = int_add(i1, 1) + i3 = int_sub(i1, 1) i4 = int_ge(i3, 36) guard_false(i4) [] jump(p0, p1, i3) @@ -1125,15 +1111,17 @@ opt = """ [p0, p1, i1] guard_early_exit() [] - i3 = int_add(i1, 1) + i3 = int_sub(i1, 1) i4 = int_ge(i3, 36) - i5 = int_add(i1, 2) + i50 = int_add(i1, -4) + i51 = int_ge(i50, 36) + guard_false(i51) [] + i5 = int_sub(i3, 1) i8 = int_ge(i5, 36) - i6 = int_add(i1, 3) + i6 = int_sub(i5, 1) i11 = int_ge(i6, 36) - i7 = int_add(i1, 4) + i7 = same_as(i50) i14 = int_ge(i7, 36) - guard_false(i14) [] v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr) v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr) v19 = vec_cast_float_to_singlefloat(v17, 2) @@ -1168,16 +1156,18 @@ i5 = int_add(i4, 4) i1 = int_add(i0, 4) i186 = int_lt(i5, 100) - i189 = int_add(i0, 8) - i187 = int_add(i4, 8) - i198 = int_add(i0, 12) + i500 = int_add(i4, 16) + i501 = int_lt(i500, 100) + guard_false(i501) [] + i189 = int_add(i1, 4) + i187 = int_add(i5, 4) + i198 = int_add(i189, 4) i188 = int_lt(i187, 100) - i207 = int_add(i0, 16) - i196 = int_add(i4, 12) + i207 = int_add(i198, 4) + i196 = int_add(i187, 4) i197 = int_lt(i196, 100) - i205 = int_add(i4, 16) + i205 = same_as(i500) i206 = int_lt(i205, 100) - guard_false(i206) [] v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr) v229 = vec_cast_singlefloat_to_float(v228, 2) v230 = vec_int_unpack(v228, 2, 2) diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -9,7 +9,7 @@ from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, - MemoryRef, Scheduler, SchedulerData, Node) + MemoryRef, Scheduler, SchedulerData, Node, IndexVar) from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp) from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.debug import debug_print, debug_start, debug_stop @@ -111,7 +111,8 @@ self.combine_packset() self.schedule() - self.collapse_index_guards() + gso = GuardStrengthenOpt(self.dependency_graph.index_vars) + gso.propagate_all_forward(self.loop) def emit_operation(self, op): if op.getopnum() == rop.DEBUG_MERGE_POINT: @@ -399,6 +400,8 @@ def unpack_from_vector(self, op, sched_data): args = op.getarglist() + if op.is_guard(): + py.test.set_trace() for i, arg in enumerate(op.getarglist()): if isinstance(arg, Box): self._unpack_from_vector(args, i, arg, sched_data) @@ -427,9 +430,7 @@ def analyse_index_calculations(self): if len(self.loop.operations) <= 1 or self.early_exit_idx == -1: return - self.dependency_graph = graph = DependencyGraph(self.loop) - label_node = graph.getnode(0) ee_guard_node = graph.getnode(self.early_exit_idx) guards = graph.guards @@ -437,7 +438,6 @@ if guard_node is ee_guard_node: continue modify_later = [] - valid_trans = True last_prev_node = None for path in guard_node.iterate_paths(ee_guard_node, True): prev_node = path.second() @@ -453,7 +453,7 @@ # | # (g) # this graph yields 2 paths from (g), thus (a) is - # remembered and skipped + # remembered and skipped the second time visited continue modify_later.append((prev_node, guard_node)) else: @@ -461,10 +461,13 @@ path.set_schedule_priority(10) modify_later.append((path.last_but_one(), None)) else: - valid_trans = False + # transformation is invalid. + # exit and do not enter else branch! break last_prev_node = prev_node - if valid_trans: + else: + # transformation is valid, modify the graph and execute + # this guard earlier for a,b in modify_later: if b is not None: a.remove_edge_to(b) @@ -478,53 +481,187 @@ guard_node.edge_to(ee_guard_node, label='pullup-last-guard') guard_node.relax_guard_to(ee_guard_node) - def collapse_index_guards(self): +class Guard(object): + """ An object wrapper around a guard. Helps to determine + if one guard implies another + """ + def __init__(self, op, cmp_op, lhs, rhs): + self.op = op + self.cmp_op = cmp_op + self.lhs = lhs + self.rhs = rhs + self.emitted = False + self.stronger = False + + def implies(self, guard, opt): + print self.cmp_op, "=>", guard.cmp_op, "?" + my_key = opt._get_key(self.cmp_op) + ot_key = opt._get_key(guard.cmp_op) + + if my_key[1] == ot_key[1]: + # same operation + lc = self.compare(self.lhs, guard.lhs) + rc = self.compare(self.rhs, guard.rhs) + print "compare", self.lhs, guard.lhs, lc + print "compare", self.rhs, guard.rhs, rc + opnum = my_key[1] + # x < y = -1,-2,... + # x == y = 0 + # x > y = 1,2,... + if opnum == rop.INT_LT: + return (lc > 0 and rc >= 0) or (lc == 0 and rc >= 0) + if opnum == rop.INT_LE: + return (lc >= 0 and rc >= 0) or (lc == 0 and rc >= 0) + if opnum == rop.INT_GT: + return (lc < 0 and rc >= 0) or (lc == 0 and rc > 0) + if opnum == rop.INT_GE: + return (lc <= 0 and rc >= 0) or (lc == 0 and rc >= 0) + return False + + def compare(self, key1, key2): + if isinstance(key1, Box): + assert isinstance(key2, Box) + assert key1 is key2 # key of hash enforces this + return 0 + # + if isinstance(key1, ConstInt): + assert isinstance(key2, ConstInt) + v1 = key1.value + v2 = key2.value + if v1 == v2: + return 0 + elif v1 < v2: + return -1 + else: + return 1 + # + if isinstance(key1, IndexVar): + assert isinstance(key2, IndexVar) + return key1.compare(key2) + # + raise RuntimeError("cannot compare: " + str(key1) + " <=> " + str(key2)) + + def emit_varops(self, opt, var): + if isinstance(var, IndexVar): + box = var.emit_operations(opt) + opt._same_as[var] = box + return box + else: + return var + + def emit_operations(self, opt): + lhs, opnum, rhs = opt._get_key(self.cmp_op) + # create trace instructions for the index + box_lhs = self.emit_varops(opt, self.lhs) + box_rhs = self.emit_varops(opt, self.rhs) + box_result = self.cmp_op.result.clonebox() + opt.emit_operation(ResOperation(opnum, [box_lhs, box_rhs], box_result)) + # guard + guard = self.op.clone() + guard.setarg(0, box_result) + opt.emit_operation(guard) + +class GuardStrengthenOpt(object): + def __init__(self, index_vars): + self.index_vars = index_vars + self._newoperations = [] + self._same_as = {} + + def find_compare_guard_bool(self, boolarg, operations, index): + i = index - 1 + # most likely hit in the first iteration + while i > 0: + op = operations[i] + if op.result and op.result == boolarg: + return op + i -= 1 + + raise RuntimeError("guard_true/false first arg not defined") + + def _get_key(self, cmp_op): + if cmp_op and rop.INT_LT <= cmp_op.getopnum() <= rop.INT_GE: + lhs_arg = cmp_op.getarg(0) + rhs_arg = cmp_op.getarg(1) + lhs_index_var = self.index_vars.get(lhs_arg, None) + rhs_index_var = self.index_vars.get(rhs_arg, None) + + cmp_opnum = cmp_op.getopnum() + # get the key, this identifies the guarded operation + if lhs_index_var and rhs_index_var: + key = (lhs_index_var.getvariable(), cmp_opnum, rhs_index_var.getvariable()) + elif lhs_index_var: + key = (lhs_index_var.getvariable(), cmp_opnum, rhs_arg) + elif rhs_index_var: + key = (lhs_arg, cmp_opnum, rhs_index_var) + else: + key = (lhs_arg, cmp_opnum, rhs_arg) + return key + return None + + + def get_key(self, guard_bool, operations, i): + cmp_op = self.find_compare_guard_bool(guard_bool.getarg(0), operations, i) + return self._get_key(cmp_op) + + def propagate_all_forward(self, loop): + """ strengthens the guards that protect an integral value """ strongest_guards = {} - strongest_guards_var = {} - index_vars = self.dependency_graph.index_vars - comparison_vars = self.dependency_graph.comparison_vars - operations = self.loop.operations - var_for_guard = {} - for i in range(len(operations)-1, -1, -1): + # index_vars = self.dependency_graph.index_vars + # comparison_vars = self.dependency_graph.comparison_vars + # the guards are ordered. guards[i] is before guards[j] iff i < j + operations = loop.operations + last_guard = None + for i,op in enumerate(operations): op = operations[i] - if op.is_guard(): - for arg in op.getarglist(): - var_for_guard[arg] = True - try: - comparison = comparison_vars[arg] - for index_var in list(comparison.getindex_vars()): - if not index_var: - continue - var = index_var.getvariable() - strongest_known = strongest_guards_var.get(var, None) - if not strongest_known: - strongest_guards_var[var] = index_var - continue - if index_var.less(strongest_known): - strongest_guards_var[var] = strongest_known - strongest_guards[op] = strongest_known - except KeyError: - pass - + if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE): + cmp_op = self.find_compare_guard_bool(op.getarg(0), operations, i) + key = self._get_key(cmp_op) + if key: + lhs_arg = cmp_op.getarg(0) + lhs = self.index_vars.get(lhs_arg, lhs_arg) + rhs_arg = cmp_op.getarg(1) + rhs = self.index_vars.get(rhs_arg, rhs_arg) + strongest = strongest_guards.get(key, None) + if not strongest: + strongest_guards[key] = Guard(op, cmp_op, lhs, rhs) + else: + guard = Guard(op, cmp_op, lhs, rhs) + if guard.implies(strongest, self): + guard.stronger = True + strongest_guards[key] = guard + # last_op_idx = len(operations)-1 - for op in operations: - if op.is_guard(): - stronger_guard = strongest_guards.get(op, None) - if stronger_guard: - # there is a stronger guard + for i,op in enumerate(operations): + op = operations[i] + if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE): + key = self.get_key(op, operations, i) + if key: + strongest = strongest_guards.get(key, None) + if not strongest or not strongest.stronger: + # If the key is not None and there _must_ be a strongest + # guard. If strongest is None, this operation implies the + # strongest guard that has been already been emitted. + self.emit_operation(op) + continue + elif strongest.emitted: + continue + strongest.emit_operations(self) + strongest.emitted = True continue - else: - self.emit_operation(op) + if op.result: + # emit a same_as op if a box uses the same index variable + index_var = self.index_vars.get(op.result, None) + box = self._same_as.get(index_var, None) + if box: + self.emit_operation(ResOperation(rop.SAME_AS, [box], op.result)) continue - if op.is_always_pure() and op.result: - try: - var_index = index_vars[op.result] - var_index.adapt_operation(op) - except KeyError: - pass self.emit_operation(op) - self.loop.operations = self._newoperations[:] + loop.operations = self._newoperations[:] + + def emit_operation(self, op): + self._newoperations.append(op) + def must_unpack_result_to_exec(op, target_op): # TODO either move to resop or util _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit