Author: Richard Plangger <r...@pasra.at> Branch: vecopt-merge Changeset: r79080:963383c4adcd Date: 2015-08-20 09:34 +0200 http://bitbucket.org/pypy/pypy/changeset/963383c4adcd/
Log: wrong target token has been added to the label jump in case a second label is introduced test for it and some documentation diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -161,10 +161,9 @@ assert part.operations[-1].getopnum() != rop.LABEL if loop.versions is not None: - # several different loop version have been generated + # every different loop version must update their target tokens for version in loop.versions: - token = version.update_token(jitcell_token) - all_target_tokens.append(token) + version.update_token(jitcell_token, all_target_tokens) if not loop.quasi_immutable_deps: loop.quasi_immutable_deps = None @@ -186,6 +185,9 @@ return all_target_tokens[0] def generate_pending_loop_versions(loop, jitdriver_sd, metainterp, jitcell_token): + """ if a loop version is created for a guard instruction (e.g. they are known + to fail frequently, then a version can be created that is immediatly compiled. + """ metainterp_sd = metainterp.staticdata cpu = metainterp_sd.cpu if loop.versions is not None: diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py --- a/rpython/jit/metainterp/history.py +++ b/rpython/jit/metainterp/history.py @@ -801,20 +801,33 @@ op.setfailargs(self.inputargs) op.rd_snapshot = None - def update_token(self, jitcell_token): + def update_token(self, jitcell_token, all_target_tokens): # this is only invoked for versioned loops! - label = self.operations[self.label_pos] + label_index = index_of_first(rop.LABEL, self.operations, 0) + label = self.operations[label_index] jump = self.operations[-1] # - assert label.getopnum() == rop.LABEL assert jump.getopnum() == rop.JUMP # token = TargetToken(jitcell_token) token.original_jitcell_token = jitcell_token + all_target_tokens.append(token) + if label.getdescr() is not jump.getdescr(): + label_index = index_of_first(rop.LABEL, self.operations, 1) + if label_index > 0: + second_label = self.operations[label_index] + # set the inner loop + second_label.setdescr(token) + jump.setdescr(token) + # set the first label + token = TargetToken(jitcell_token) + token.original_jitcell_token = jitcell_token + all_target_tokens.append(token) + label.setdescr(token) + return label.setdescr(token) jump.setdescr(token) - return token class TreeLoop(object): inputargs = None diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -876,8 +876,8 @@ descr = label.getdescr() assert isinstance(descr, TargetToken) token = TargetToken(descr.targeting_jitcell_token) - oplist[0] = label.copy_and_change(label.getopnum(), label_args, None, token) - oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args, None, token) + oplist[0] = label.copy_and_change(label.getopnum(), args=label_args, descr=token) + oplist[-1] = jump.copy_and_change(jump.getopnum(), args=jump_args, descr=token) # return [ResOperation(rop.LABEL, orig_label_args, None, descr)] + \ self.invariant_oplist + oplist diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -1389,113 +1389,30 @@ def test_1(self): trace = """ - [p0, p1, p5, p6, p7, p9, p11, p12] - debug_merge_point(0, 0, '<code object <module>. file '/home/rich/fijal.py'. line 2> #34 FOR_ITER') - guard_early_exit(descr=<ResumeAtLoopHeaderDescr object at 0x7ffff7e3cc80>) [p1, p0, p5, p6, p7, p9] - p13 = getfield_gc(p9, descr=<FieldP pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 16>) - guard_nonnull(p13, descr=<ResumeGuardNonnullDescr object at 0x7ffff7e3cce0>) [p1, p0, p9, p13, p5, p6, p7] - i14 = getfield_gc(p9, descr=<FieldS pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_index 8>) - p15 = getfield_gc(p13, descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_strategy 16>) - guard_class(p15, 140737326900656, descr=<ResumeGuardClassDescr object at 0x7ffff7e3cd40>) [p1, p0, p9, i14, p15, p13, p5, p6, p7] - p17 = getfield_gc(p13, descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_lstorage 8>) - i18 = getfield_gc_pure(p17, descr=<FieldS tuple1.item0 8>) - i20 = int_lt(i14, 0) - guard_false(i20, descr=<ResumeGuardFalseDescr object at 0x7ffff7e3cda0>) [p1, p0, p9, i14, i18, p5, p6, p7] - i21 = int_ge(i14, i18) - guard_false(i21, descr=<ResumeGuardFalseDescr object at 0x7ffff7e3ce00>) [p1, p0, p9, i14, p5, p6, p7] - i23 = int_add(i14, 1) - debug_merge_point(0, 0, '<code object <module>. file '/home/rich/fijal.py'. line 2> #37 STORE_NAME') - p24 = getfield_gc(p5, descr=<FieldP pypy.interpreter.pyframe.FrameDebugData.inst_w_locals 48>) - setfield_gc(p9, i23, descr=<FieldS pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_index 8>) - guard_value(p24, 25, descr=<ResumeGuardValueDescr object at 0x7ffff7e3ce60>) [p1, p0, p24, p5, p6, p7, p9, i14] - p26 = getfield_gc(p24, descr=<FieldP pypy.objspace.std.dictmultiobject.W_DictMultiObject.inst_strategy 16>) - guard_value(p26, 27, descr=<ResumeGuardValueDescr object at 0x7ffff7e3cec0>) [p1, p0, p26, p24, p5, p6, p7, p9, i14] - guard_not_invalidated(descr=<ResumeGuardNotInvalidated object at 0x7ffff7e3cf20>) [p1, p0, p24, p5, p6, p7, p9, i14] - debug_merge_point(0, 0, '<code object <module>. file '/home/rich/fijal.py'. line 2> #40 LOAD_NAME') - debug_merge_point(0, 0, '<code object <module>. file '/home/rich/fijal.py'. line 2> #43 CALL_FUNCTION') - p28 = force_token() - enter_portal_frame(15, 8070450532247933488) - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #0 LOAD_CONST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #3 STORE_FAST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #6 SETUP_LOOP') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #9 LOAD_FAST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #12 LOAD_CONST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #15 COMPARE_OP') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #18 POP_JUMP_IF_FALSE') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #21 LOAD_GLOBAL') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #24 LOAD_FAST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #27 BINARY_SUBSCR') - p32 = getfield_gc(31, descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_strategy 16>) - setfield_gc(33, i14, descr=<FieldS pypy.objspace.std.typeobject.IntMutableCell.inst_intvalue 8>) - guard_class(p32, 140737326657856, descr=<ResumeGuardClassDescr object at 0x7ffff7e3cf80>) [p1, p0, p11, p32, p5, p6, p7, p9, p28, p12, None] - p36 = getfield_gc(31, descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_lstorage 8>) - i37 = getfield_gc(p36, descr=<FieldS list.length 8>) - i39 = uint_ge(0, i37) - guard_false(i39, descr=<ResumeGuardFalseDescr object at 0x7ffff7e3cfe0>) [p1, p0, p11, i37, p36, p5, p6, p7, p9, p28, p12, None] - p40 = getfield_gc(p36, descr=<FieldP list.items 16>) - i41 = getarrayitem_gc(p40, 0, descr=intarraydescr) - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #28 LOAD_CONST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #31 BINARY_ADD') - i43 = int_add_ovf(i41, 1) - guard_no_overflow(descr=<ResumeGuardNoOverflowDescr object at 0x7ffff7e3d040>) [p1, p0, p11, i43, p5, p6, p7, p9, i41, p28, p12, None] - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #32 LOAD_GLOBAL') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #35 LOAD_FAST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #38 STORE_SUBSCR') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #39 LOAD_FAST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #42 LOAD_CONST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #45 INPLACE_ADD') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #46 STORE_FAST') - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #49 JUMP_ABSOLUTE') - i45 = getfield_raw(140737351875840, descr=<FieldS pypysig_long_struct.c_value 0>) - setarrayitem_gc(p40, 0, i43, descr=intarraydescr) - i48 = int_lt(i45, 0) - guard_false(i48, descr=<ResumeGuardFalseDescr object at 0x7ffff7e3d0a0>) [p1, p0, p11, p5, p6, p7, p9, None, p28, p12, None] - debug_merge_point(1, 1, '<code object f. file '/home/rich/fijal.py'. line 4> #9 LOAD_FAST') - p49 = force_token() - p51 = new_with_vtable(140737326477112) - p53 = new_array_clear(4, descr=<ArrayP 8>) - p55 = new_with_vtable(100000) - p57 = new_with_vtable(140737326516800) - setfield_gc(p57, p28, descr=<FieldP JitVirtualRef.virtual_token 8>) - setfield_gc(p57, 0, descr=<FieldP JitVirtualRef.forced 16>) - setfield_gc(p11, p57, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref 72>) - setfield_gc(p0, p49, descr=<FieldP pypy.interpreter.pyframe.PyFrame.vable_token 8>) - setfield_gc(p51, 0, descr=<FieldU pypy.interpreter.pyframe.PyFrame.inst_escaped 88>) - setfield_gc(p51, 0, descr=<FieldU pypy.interpreter.pyframe.PyFrame.inst_frame_finished_execution 89>) - setfield_gc(p51, 9, descr=<FieldS pypy.interpreter.pyframe.PyFrame.inst_last_instr 40>) - setfield_gc(p51, 1, descr=<FieldS pypy.interpreter.pyframe.PyFrame.inst_valuestackdepth 72>) - setfield_gc(p51, 25, descr=<FieldP pypy.interpreter.pyframe.PyFrame.inst_w_globals 80>) - setfield_gc(p51, 63, descr=<FieldP pypy.interpreter.pyframe.PyFrame.inst_pycode 64>) - setfield_gc(p55, 1, descr=<FieldS pypy.objspace.std.intobject.W_IntObject.inst_intval 8>) - setarrayitem_gc(p53, 0, p55, descr=<ArrayP 8>) - setfield_gc(p51, p53, descr=<FieldP pypy.interpreter.pyframe.PyFrame.inst_locals_cells_stack_w 56>) - setfield_gc(p51, p12, descr=<FieldP pypy.interpreter.pyframe.PyFrame.inst_f_backref 24>) - setfield_gc(p51, 66, descr=<FieldP pypy.interpreter.pyframe.PyFrame.inst_lastblock 48>) - p67 = call_assembler(p51, p11, descr=<Loop2>) - guard_not_forced(descr=<ResumeGuardForcedDescr object at 0x7ffff7e4b4d8>) [p1, p0, p11, p51, p67, p57, p5, p6, p7, p9] - keepalive(p51) - guard_no_exception(descr=<ResumeGuardNoExceptionDescr object at 0x7ffff7e3d100>) [p1, p0, p11, p51, p67, p57, p5, p6, p7, p9] - leave_portal_frame(15) - p69 = getfield_gc(p11, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref 72>) - p70 = getfield_gc(p51, descr=<FieldP pypy.interpreter.pyframe.PyFrame.inst_f_backref 24>) - i71 = getfield_gc(p51, descr=<FieldU pypy.interpreter.pyframe.PyFrame.inst_escaped 88>) - setfield_gc(p11, p70, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref 72>) - guard_false(i71, descr=<ResumeGuardFalseDescr object at 0x7ffff7e3d160>) [p1, p0, p67, p69, p51, p57, p5, p6, p7, p9] - i72 = getfield_gc(p5, descr=<FieldU pypy.interpreter.pyframe.FrameDebugData.inst_is_being_profiled 56>) - setfield_gc(p57, 0, descr=<FieldP JitVirtualRef.virtual_token 8>) - guard_value(i72, 0, descr=<ResumeGuardValueDescr object at 0x7ffff7e3d1c0>) [i72, p1, p0, p5, p6, p7, p9, p67] - debug_merge_point(0, 0, '<code object <module>. file '/home/rich/fijal.py'. line 2> #46 POP_TOP') - p75 = getfield_gc(p5, descr=<FieldP pypy.interpreter.pyframe.FrameDebugData.inst_w_f_trace 40>) - guard_isnull(p75, descr=<ResumeGuardIsnullDescr object at 0x7ffff7e3d220>) [p1, p0, p75, p5, p6, p7, p9, p67] - debug_merge_point(0, 0, '<code object <module>. file '/home/rich/fijal.py'. line 2> #47 JUMP_ABSOLUTE') - guard_not_invalidated(descr=<ResumeGuardNotInvalidated object at 0x7ffff7e3d280>) [p1, p0, p5, p6, p7, p9] - i77 = getfield_raw(140737351875840, descr=<FieldS pypysig_long_struct.c_value 0>) - i79 = int_lt(i77, 0) - guard_false(i79, descr=<ResumeGuardFalseDescr object at 0x7ffff7e3d2e0>) [p1, p0, p5, p6, p7, p9] - debug_merge_point(0, 0, '<code object <module>. file '/home/rich/fijal.py'. line 2> #34 FOR_ITER') - jump(p0, p1, p5, p6, p7, p9, p11, p70) + [p0, p1, p6, p7, i13, p14, p15] + guard_early_exit(descr=<ResumeAtLoopHeaderDescr object at 0x7f89c54cdbe0>) [p1, p0, p6, p7, i13] + guard_not_invalidated(descr=<ResumeGuardNotInvalidated object at 0x7f89c54cdc40>) [p1, p0, p6, p7, i13] + i17 = int_lt(i13, 10000) + guard_true(i17, descr=<ResumeGuardTrueDescr object at 0x7f89c54cdca0>) [p1, p0, p6, p7, i13] + i18 = getfield_gc(p14, descr=<FieldS list.length 8>) + i19 = uint_ge(i13, i18) + guard_false(i19, descr=<ResumeGuardFalseDescr object at 0x7f89c54cdd00>) [p1, p0, i18, i13, p14, p6, p7, None] + p21 = getfield_gc(p14, descr=<FieldP list.items 16>) + f22 = getarrayitem_gc(p21, i13, descr=<ArrayF 8>) + i23 = getfield_gc(p15, descr=<FieldS list.length 8>) + i24 = uint_ge(i13, i23) + guard_false(i24, descr=<ResumeGuardFalseDescr object at 0x7f89c54cdd60>) [p1, p0, i23, i13, p15, p6, p7, f22, None] + p25 = getfield_gc(p15, descr=<FieldP list.items 16>) + f26 = getarrayitem_gc(p25, i13, descr=floatarraydescr) + f27 = float_add(f22, f26) + setarrayitem_gc(p21, i13, f27, descr=floatarraydescr) + i29 = int_add(i13, 1) + #i31 = getfield_raw(140229696280448, descr=<FieldS pypysig_long_struct.c_value 0>) + i33 = int_lt(0, 1) + guard_false(i33, descr=<ResumeGuardFalseDescr object at 0x7f89c54cddc0>) [p1, p0, p6, p7, i29, None, None] + jump(p0, p1, p6, p7, i29, p14, p15) """ - #opt = self.schedule(self.parse_loop(trace)) + opt = self.schedule(self.parse_loop(trace)) #self.debug_print_operations(opt.loop) class TestLLtype(BaseTestVectorize, LLtypeMixin): diff --git a/rpython/jit/metainterp/test/test_vectorize.py b/rpython/jit/metainterp/test/test_vectorize.py --- a/rpython/jit/metainterp/test/test_vectorize.py +++ b/rpython/jit/metainterp/test/test_vectorize.py @@ -19,12 +19,12 @@ def setup_method(self, method): print "RUNNING", method.__name__ - def meta_interp(self, f, args, policy=None): + def meta_interp(self, f, args, policy=None, vec=True, vec_all=False): return ll_meta_interp(f, args, enable_opts=self.enable_opts, policy=policy, CPUClass=self.CPUClass, type_system=self.type_system, - vec=True) + vec=vec, vec_all=vec_all) @py.test.mark.parametrize('i',[3,4,5,6,7,8,9,50]) def test_vectorize_simple_load_arith_store_int_add_index(self,i): @@ -154,7 +154,6 @@ def test_sum(self): myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) - myjitdriver2 = JitDriver(greens = [], reds = 'auto', vectorize=True) T = lltype.Array(rffi.DOUBLE, hints={'nolength': True}) def f(d): va = lltype.malloc(T, d, flavor='raw', zero=True) @@ -171,6 +170,56 @@ res = self.meta_interp(f, [60]) assert res == f(60) == sum(range(60)) + def test_constant_expand(self): + myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) + T = lltype.Array(rffi.DOUBLE, hints={'nolength': True}) + def f(d): + va = lltype.malloc(T, d, flavor='raw', zero=True) + i = 0 + while i < d: + myjitdriver.jit_merge_point() + va[i] = va[i] + 34.5 + i += 1 + val = va[0] + lltype.free(va, flavor='raw') + return val + res = self.meta_interp(f, [60]) + assert res == f(60) == 34.5 + + def test_constant_expand_vec_all(self): + myjitdriver = JitDriver(greens = [], reds = 'auto') + T = lltype.Array(rffi.DOUBLE, hints={'nolength': True}) + def f(d): + va = lltype.malloc(T, d, flavor='raw', zero=True) + i = 0 + while i < d: + myjitdriver.jit_merge_point() + if not (i < 60): + raise IndexError + va[i] = va[i] + 34.5 + i += 1 + val = va[0] + lltype.free(va, flavor='raw') + return val + res = self.meta_interp(f, [60], vec_all=True) + assert res == f(60) == 34.5 + + def test_variable_expand(self): + myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) + T = lltype.Array(rffi.DOUBLE, hints={'nolength': True}) + def f(d,variable): + va = lltype.malloc(T, d, flavor='raw', zero=True) + i = 0 + while i < d: + myjitdriver.jit_merge_point() + va[i] = va[i] + variable + i += 1 + val = va[0] + lltype.free(va, flavor='raw') + return val + res = self.meta_interp(f, [60,58.4547]) + assert res == f(60,58.4547) == 58.4547 + @py.test.mark.parametrize('i',[15]) def test_array_bounds_check_elimination(self,i): myjitdriver = JitDriver(greens = [], diff --git a/rpython/jit/metainterp/warmspot.py b/rpython/jit/metainterp/warmspot.py --- a/rpython/jit/metainterp/warmspot.py +++ b/rpython/jit/metainterp/warmspot.py @@ -72,7 +72,7 @@ loop_longevity=0, retrace_limit=5, function_threshold=4, enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15, max_unroll_recursion=7, vec=0, vec_all=0, vec_cost=0, - vec_length=50, vec_ratio=6, **kwds): + vec_length=60, vec_ratio=2, **kwds): from rpython.config.config import ConfigError translator = interp.typer.annotator.translator try: diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -554,10 +554,10 @@ 'optimizations to enable, or all = %s' % ENABLE_ALL_OPTS, 'max_unroll_recursion': 'how many levels deep to unroll a recursive function', 'vec': 'turn on the vectorization optimization (vecopt). requires sse4.1', - 'vec_all': 'all = 1: try to vectorize trace loops that occur outside of the numpy library.', - 'vec_cost': 'cost = 0: threshold for which traces to bail. 0 means the costs.', - 'vec_length': 'length = 50: the amount of instructions allowed in "all" traces.', - 'vec_ratio': 'ratio = 6: an integer (0-10 => X / 10) statements that have vector equivalents ' + 'vec_all': 'try to vectorize trace loops that occur outside of the numpy library.', + 'vec_cost': 'threshold for which traces to bail. 0 means the costs.', + 'vec_length': 'the amount of instructions allowed in "all" traces.', + 'vec_ratio': 'an integer (0-10 transfored into a float by X / 10.0) statements that have vector equivalents ' 'divided by the total number of trace instructions.', } @@ -577,8 +577,8 @@ 'vec': 0, 'vec_all': 0, 'vec_cost': 0, - 'vec_length': 50, - 'vec_ratio': 6, + 'vec_length': 60, + 'vec_ratio': 2, } unroll_parameters = unrolling_iterable(PARAMETERS.items()) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit