Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77640:f83b729acb89 Date: 2015-05-28 13:04 +0200 http://bitbucket.org/pypy/pypy/changeset/f83b729acb89/
Log: retinkering the dependency construction, statements with sideeffects need stronger dependencies improved the guard strengthen optimization removed a glitch in constructing pack operations (arguments missing and intermixed) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2696,7 +2696,7 @@ # if source is a normal register (unpack) assert count == 1 assert si == 0 - self.mc.MOVAPS(X86_64_XMM_SCRATCH_REG, srcloc) + self.mc.move(X86_64_XMM_SCRATCH_REG, srcloc) src = X86_64_XMM_SCRATCH_REG.value select = ((si & 0x3) << 6)|((ri & 0x3) << 4) self.mc.INSERTPS_xxi(resloc.value, src, select) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1579,20 +1579,20 @@ del consider_vec_logic def consider_vec_int_pack(self, op): - index = op.getarg(1) - arg = op.getarg(2) + # new_res = vec_int_pack(res, src, index, count) + arg = op.getarg(1) + index = op.getarg(2) + count = op.getarg(3) assert isinstance(index, ConstInt) + assert isinstance(count, ConstInt) args = op.getarglist() srcloc = self.make_sure_var_in_reg(arg, args) resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args) - residx = 0 + residx = index.value # where to put it in result? + srcidx = 0 assert isinstance(op.result, BoxVector) - args = op.getarglist() size = op.result.getsize() - count = 1 - if isinstance(arg, BoxVector): - count = arg.getcount() - arglocs = [resloc, srcloc, imm(index.value), imm(0), imm(count), imm(size)] + arglocs = [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value), imm(size)] self.perform(op, arglocs, resloc) consider_vec_float_pack = consider_vec_int_pack diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -392,6 +392,10 @@ def __init__(self, graph): self.graph = graph self.defs = {} + self.non_pure = [] + + def add_non_pure(self, node): + self.non_pure.append(node) def define(self, arg, node, argcell=None): if isinstance(arg, Const): @@ -537,9 +541,13 @@ if node.exits_early(): pass else: + # consider cross iterations? if len(self.guards) > 0: last_guard = self.guards[-1] last_guard.edge_to(node, "guardorder") + for nonpure in tracker.non_pure: + nonpure.edge_to(node, failarg=True) + tracker.non_pure = [] self.guards.append(node) else: self.build_non_pure_dependencies(node, tracker) @@ -689,6 +697,8 @@ if len(self.guards) > 0: last_guard = self.guards[-1] last_guard.edge_to(node, "sideeffect") + # and the next guard instruction + tracker.add_non_pure(node) def __repr__(self): graph = "graph([\n" diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -937,10 +937,10 @@ i3 = int_lt(i2, 102) guard_true(i3) [p0,i0] {dead_code} - i500 = same_as(i2) - i300 = int_lt(i500, 102) + i500 = int_add(i0, 16) + i501 = int_lt(i2, 102) i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr) - jump(p0,i500) + jump(p0,i2) """.format(dead_code=dead_code) vopt = self.vectorize(self.parse_loop(ops),15) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -982,12 +982,12 @@ i2 = int_add(i0, 2) i3 = int_lt(i2, 10) guard_true(i3) [p0,i0] - i4 = same_as(i2) - i5 = int_lt(i4, 10) + i4 = int_add(i0, 2) + i5 = int_lt(i2, 10) v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) v3 = vec_int_expand(42) v2 = vec_int_mul(v1, v3) - jump(p0,i4) + jump(p0,i2) """ vopt = self.vectorize(self.parse_loop(ops),1) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1011,12 +1011,12 @@ i2 = int_add(i0, 2) i3 = int_lt(i2, 10) guard_true(i3) [p0,i0] - i4 = same_as(i2) - i5 = int_lt(i4, 10) + i4 = int_add(i0, 2) + i5 = int_lt(i2, 10) v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr) v3 = vec_float_expand(f3) v2 = vec_int_mul(v1, v3) - jump(p0,i4,f3) + jump(p0,i2,f3) """ vopt = self.vectorize(self.parse_loop(ops),1) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1053,15 +1053,15 @@ i55 = int_add(i44, 16) i54 = int_add(i41, 16) i56 = int_add(i37, 16) - i629 = same_as(i637) - i57 = int_ge(i629, i18) + i629 = int_add(i28, 2) + i57 = int_ge(i637, i18) v61 = vec_raw_load(i21, i44, 2, descr=floatarraydescr) v62 = vec_raw_load(i4, i41, 2, descr=floatarraydescr) v63 = vec_float_add(v61, v62) vec_raw_store(i0, i37, v63, descr=floatarraydescr) f100 = vec_float_unpack(v61, 1, 1) f101 = vec_float_unpack(v62, 1, 1) - jump(p36, i629, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18) + jump(p36, i637, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18) """ vopt = self.vectorize(self.parse_loop(ops)) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1090,15 +1090,15 @@ i8 = int_ge(i5, 36) i6 = int_add(i1, 3) i11 = int_ge(i6, 36) - i7 = same_as(i50) - i14 = int_ge(i7, 36) + i7 = int_add(i1, 4) + i14 = int_ge(i50, 36) v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr) v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr) v19 = vec_cast_float_to_singlefloat(v17) v20 = vec_cast_float_to_singlefloat(v18) v21 = vec_float_pack(v19, v20, 2, 2) vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr) - jump(p0, p1, i7) + jump(p0, p1, i50) """ vopt = self.vectorize(self.parse_loop(ops)) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1136,8 +1136,8 @@ i207 = int_add(i0, 16) i196 = int_add(i4, 12) i197 = int_lt(i196, 100) - i205 = same_as(i500) - i206 = int_lt(i205, 100) + i205 = int_add(i4, 16) + i206 = int_lt(i500, 100) v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr) v229 = vec_cast_singlefloat_to_float(v228) v230 = vec_int_unpack(v228, 2, 2) @@ -1152,7 +1152,7 @@ v239 = vec_cast_float_to_singlefloat(v237) v240 = vec_float_pack(v238, v239, 2, 2) vec_raw_store(p2, i4, v240, descr=singlefloatarraydescr) - jump(p0, p1, p2, i207, i205) + jump(p0, p1, p2, i207, i500) """ vopt = self.vectorize(self.parse_loop(ops)) self.assert_equal(vopt.loop, self.parse_loop(opt)) @@ -1237,6 +1237,47 @@ opt = self.vectorize(self.parse_loop(trace)) self.debug_print_operations(opt.loop) + def test_cast_1(self): + trace = """ + [i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, i20, i21, i22, i23] + guard_early_exit() [p8, p5, p4, p2, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, i9] + i24 = raw_load(i20, i16, descr=singlefloatarraydescr) + guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, None] + i27 = int_add(i16, 4) + i28 = raw_load(i21, i19, descr=singlefloatarraydescr) + i30 = int_add(i19, 4) + f31 = cast_singlefloat_to_float(i24) + f32 = cast_singlefloat_to_float(i28) + f33 = float_add(f31, f32) + i34 = cast_float_to_singlefloat(f33) + raw_store(i22, i13, i34, descr=singlefloatarraydescr) + i36 = int_add(i12, 1) + i38 = int_add(i13, 4) + i39 = int_ge(i36, i23) + guard_false(i39) [p8, p5, p4, p2, i27, i28, i30, i24, i38, i36, p17, None, None, None, None, p14, p11, i18, i15, None, None] + jump(i24, i28, p2, p11, i36, i38, p4, p5, p14, i15, p8, i27, p17, i18, i30, i20, i21, i22, i23) + """ + opt = self.vectorize(self.parse_loop(trace)) + self.debug_print_operations(opt.loop) + + def test_all_guard(self): + trace = """ + [p0, p3, i4, i5, i6, i7] + guard_early_exit() [p0, p3, i5, i4] + f8 = raw_load(i6, i5, descr=floatarraydescr) + guard_not_invalidated() [p0, f8, p3, i5, i4] + i9 = cast_float_to_int(f8) + i11 = int_and(i9, 255) + guard_false(i11) [p0, p3, i5, i4] + i13 = int_add(i4, 1) + i15 = int_add(i5, 8) + i16 = int_ge(i13, i7) + guard_false(i16) [p0, i13, i15, p3, None, None] + jump(p0, p3, i13, i15, i6, i7) + """ + opt = self.vectorize(self.parse_loop(trace)) + self.debug_print_operations(opt.loop) + def test_reduction_basic(self): trace = """ [p5, i6, p2, i7, p1, p8, i9, i10, f11, i12, i13, i14] diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -507,14 +507,15 @@ """ An object wrapper around a guard. Helps to determine if one guard implies another """ - def __init__(self, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg): + def __init__(self, index, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg): + self.index = index self.op = op self.cmp_op = cmp_op self.lhs = lhs self.rhs = rhs self.lhs_arg = lhs_arg self.rhs_arg = rhs_arg - self.emitted = False + self.implied = False self.stronger = False def implies(self, guard, opt): @@ -638,7 +639,7 @@ def propagate_all_forward(self, loop): """ strengthens the guards that protect an integral value """ strongest_guards = {} - implied_guards = {} + guards = {} # the guards are ordered. guards[i] is before guards[j] iff i < j operations = loop.operations last_guard = None @@ -652,44 +653,43 @@ lhs = self.index_vars.get(lhs_arg, lhs_arg) rhs_arg = cmp_op.getarg(1) rhs = self.index_vars.get(rhs_arg, rhs_arg) - strongest = strongest_guards.get(key, None) - if not strongest: - strongest_guards[key] = Guard(op, cmp_op, - lhs, lhs_arg, - rhs, rhs_arg) + other = strongest_guards.get(key, None) + if not other: + guard = Guard(i, op, cmp_op, + lhs, lhs_arg, + rhs, rhs_arg) + strongest_guards[key] = guard + # nothing known, at this position emit the guard + guards[i] = guard else: # implicit index(strongest) < index(current) - guard = Guard(op, cmp_op, + guard = Guard(i, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg) - if guard.implies(strongest, self): + if guard.implies(other, self): + strongest_guards[key] = guard guard.stronger = True - strongest_guards[key] = guard - elif strongest.implies(guard, self): - implied_guards[op] = True + guard.index = other.index + guards[other.index] = guard + # do not mark as emit + continue + elif other.implies(guard, self): + guard.implied = True + # mark as emit + guards[i] = guard + strongest_guards = None # self.renamer = Renamer() last_op_idx = len(operations)-1 for i,op in enumerate(operations): op = operations[i] if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE): - if implied_guards.get(op, False): - # this guard is implied, thus removed + guard = guards.get(i, None) + if not guard or guard.implied: + # this guard is implied or marked as not emitted (= None) continue - key = self.get_key(op, operations, i) - if key[0] is not None: - strongest = strongest_guards.get(key, None) - if not strongest or not strongest.stronger: - # If the key is not None and there _must_ be a strongest - # guard. If strongest is None, this operation implies the - # strongest guard that has been already been emitted. - self.emit_operation(op) - continue - elif strongest.emitted: - continue - strongest.emit_operations(self) - strongest.emitted = True + if guard.stronger: + guard.emit_operations(self) continue if op.result: - # emit a same_as op if a box uses the same index variable index_var = self.index_vars.get(op.result, None) if index_var: if not index_var.is_identity(): @@ -981,7 +981,7 @@ arg = op.getoperation().getarg(argidx) new_box = vbox.clonebox() resop = ResOperation(opnum, - [vbox,ConstInt(i),arg], new_box) + [vbox,arg,ConstInt(i),ConstInt(0)], new_box) vbox = new_box self.preamble_ops.append(resop) return vbox _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit