Author: Richard Plangger <r...@pasra.at> Branch: vecopt-merge Changeset: r79029:fc65e2cfbacc Date: 2015-08-18 10:41 +0200 http://bitbucket.org/pypy/pypy/changeset/fc65e2cfbacc/
Log: added a new jit parameter vec_params which encode the previously known vectorize_user, vec_cost and add a maximum trace length and a ratio in the fast path to skip vector traces. renamed vectorize to vec, vectorize_user to vec_all diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -97,7 +97,7 @@ backendopt=True, graph_and_interp_only=True, ProfilerClass=Profiler, - vectorize=True) + vec=True) self.__class__.interp = interp self.__class__.graph = graph diff --git a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py --- a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py +++ b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py @@ -17,7 +17,7 @@ t.buildrtyper().specialize() if kwds['jit']: - apply_jit(t, vectorize=True) + apply_jit(t, vec=True) class TestVecOptX86(object): def test_translate(self): diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py b/rpython/jit/metainterp/optimizeopt/__init__.py --- a/rpython/jit/metainterp/optimizeopt/__init__.py +++ b/rpython/jit/metainterp/optimizeopt/__init__.py @@ -69,8 +69,8 @@ optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts) if unroll: if not export_state and \ - ((warmstate.vectorize and jitdriver_sd.vectorize) \ - or warmstate.vectorize_user): + ((warmstate.vec and jitdriver_sd.vec) \ + or warmstate.vec_all): optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations, inline_short_preamble, start_state, warmstate) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py b/rpython/jit/metainterp/optimizeopt/test/test_util.py --- a/rpython/jit/metainterp/optimizeopt/test/test_util.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py @@ -333,14 +333,14 @@ storedebug = None class FakeWarmState(object): - vectorize = True # default is on - vectorize_user = False + vec = True # default is on + vec_all = False vec_cost = 0 def __init__(self, enable_opts): self.enable_opts = enable_opts class FakeJitDriverStaticData(object): - vectorize = False + vec = False class FakeMetaInterpStaticData(object): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -22,7 +22,7 @@ from rpython.rlib.rarithmetic import LONG_BIT class FakeJitDriverStaticData(object): - vectorize=True + vec=True class FakeCostModel(CostModel): def __init__(self): diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -35,6 +35,9 @@ inline_short_preamble, start_state, warmstate): optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations, inline_short_preamble, start_state, False) + user_code = not jitdriver_sd.vec and warmstate.vec_all + if user_code and user_loop_bail_fast_path(loop, warmstate): + return version = loop.snapshot() try: debug_start("vec-opt-loop") @@ -47,7 +50,6 @@ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0) opt.propagate_all_forward() gso = GuardStrengthenOpt(opt.dependency_graph.index_vars) - user_code = not jitdriver_sd.vectorize and warmstate.vectorize_user gso.propagate_all_forward(opt.loop, user_code) # connect all compile loop version fail descriptors to this version version.register_all_guards(loop.operations, opt.appended_arg_count) @@ -85,6 +87,36 @@ else: raise +def user_loop_bail_fast_path(loop, warmstate): + """ in a fast path over the trace loop: try to prevent vecopt + of spending time on a loop that will most probably fail """ + + resop_count = 0 # the count of operations minus debug_merge_points + vector_instr = 0 + at_least_one_array_access = True + for i,op in enumerate(loop.operations): + if op.getopnum() == rop.DEBUG_MERGE_POINT: + continue + + if op.vector >= 0 and not op.is_guard(): + vector_instr += 1 + + resop_count += 1 + + if op.is_primitive_array_access(): + at_least_one_array_access = True + + if not at_least_one_array_access: + return True + + if resop_count > warmstate.vec_length: + return True + + if float(vector_instr)/float(resop_count) <= warmstate.vec_ratio: + return True + + return False + def cmp_pack_lt(a,b): return a.left.getindex() < b.left.getindex() packsort = listsort.make_timsort_class(lt=cmp_pack_lt) diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -1123,7 +1123,7 @@ if self.metainterp.seen_loop_header_for_jdindex < 0: if not any_operation: - if jitdriver_sd.vectorize or jitdriver_sd.warmstate.vectorize_user: + if jitdriver_sd.vec or jitdriver_sd.warmstate.vec_all: self.metainterp.generate_guard(rop.GUARD_EARLY_EXIT) return if self.metainterp.portal_call_depth or not self.metainterp.get_procedure_token(greenboxes, True): diff --git a/rpython/jit/metainterp/test/support.py b/rpython/jit/metainterp/test/support.py --- a/rpython/jit/metainterp/test/support.py +++ b/rpython/jit/metainterp/test/support.py @@ -51,7 +51,7 @@ trace_limit = sys.maxint enable_opts = ALL_OPTS_DICT - vectorize = True + vec = True if kwds.pop('disable_optimizations', False): FakeWarmRunnerState.enable_opts = {} @@ -70,7 +70,7 @@ greenfield_info = None result_type = result_kind portal_runner_ptr = "???" - vectorize = False + vec = False stats = history.Stats() cpu = CPUClass(rtyper, stats, None, False) diff --git a/rpython/jit/metainterp/test/test_compile.py b/rpython/jit/metainterp/test/test_compile.py --- a/rpython/jit/metainterp/test/test_compile.py +++ b/rpython/jit/metainterp/test/test_compile.py @@ -66,7 +66,7 @@ index = 0 warmstate = FakeState() virtualizable_info = None - vectorize = False + vec = False def test_compile_loop(): cpu = FakeCPU() diff --git a/rpython/jit/metainterp/test/test_vectorize.py b/rpython/jit/metainterp/test/test_vectorize.py --- a/rpython/jit/metainterp/test/test_vectorize.py +++ b/rpython/jit/metainterp/test/test_vectorize.py @@ -24,7 +24,7 @@ policy=policy, CPUClass=self.CPUClass, type_system=self.type_system, - vectorize=1) + vec=True) @py.test.mark.parametrize('i',[3,4,5,6,7,8,9,50]) def test_vectorize_simple_load_arith_store_int_add_index(self,i): diff --git a/rpython/jit/metainterp/warmspot.py b/rpython/jit/metainterp/warmspot.py --- a/rpython/jit/metainterp/warmspot.py +++ b/rpython/jit/metainterp/warmspot.py @@ -32,7 +32,7 @@ # Bootstrapping def apply_jit(translator, backend_name="auto", inline=False, - vectorize=False, enable_opts=ALL_OPTS_NAMES, **kwds): + vec=False, enable_opts=ALL_OPTS_NAMES, **kwds): if 'CPUClass' not in kwds: from rpython.jit.backend.detect_cpu import getcpuclass kwds['CPUClass'] = getcpuclass(backend_name) @@ -47,7 +47,7 @@ **kwds) for jd in warmrunnerdesc.jitdrivers_sd: jd.warmstate.set_param_inlining(inline) - jd.warmstate.set_param_vectorize(vectorize) + jd.warmstate.set_param_vec(vec) jd.warmstate.set_param_enable_opts(enable_opts) warmrunnerdesc.finish() translator.warmrunnerdesc = warmrunnerdesc # for later debugging @@ -68,12 +68,11 @@ return jittify_and_run(interp, graph, args, backendopt=backendopt, **kwds) def jittify_and_run(interp, graph, args, repeat=1, graph_and_interp_only=False, - backendopt=False, trace_limit=sys.maxint, - inline=False, loop_longevity=0, retrace_limit=5, - function_threshold=4, + backendopt=False, trace_limit=sys.maxint, inline=False, + loop_longevity=0, retrace_limit=5, function_threshold=4, enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15, - max_unroll_recursion=7, vectorize=0, vectorize_user=0, - vec_cost=0, **kwds): + max_unroll_recursion=7, vec=0, vec_params='0:0:50:0.6', + **kwds): from rpython.config.config import ConfigError translator = interp.typer.annotator.translator try: @@ -96,9 +95,8 @@ jd.warmstate.set_param_max_retrace_guards(max_retrace_guards) jd.warmstate.set_param_enable_opts(enable_opts) jd.warmstate.set_param_max_unroll_recursion(max_unroll_recursion) - jd.warmstate.set_param_vectorize(vectorize) - jd.warmstate.set_param_vectorize_user(vectorize_user) - jd.warmstate.set_param_vec_cost(vec_cost) + jd.warmstate.set_param_vec(vec) + jd.warmstate.set_param_vec_params(vec_params) warmrunnerdesc.finish() if graph_and_interp_only: return interp, graph @@ -398,7 +396,7 @@ graph.func._dont_inline_ = True graph.func._jit_unroll_safe_ = True jd.jitdriver = block.operations[pos].args[1].value - jd.vectorize = jd.jitdriver.vectorize + jd.vec = jd.jitdriver.vec jd.portal_runner_ptr = "<not set so far>" jd.result_type = history.getkind(jd.portal_graph.getreturnvar() .concretetype)[0] diff --git a/rpython/jit/metainterp/warmstate.py b/rpython/jit/metainterp/warmstate.py --- a/rpython/jit/metainterp/warmstate.py +++ b/rpython/jit/metainterp/warmstate.py @@ -300,14 +300,21 @@ if self.warmrunnerdesc.memory_manager: self.warmrunnerdesc.memory_manager.max_unroll_recursion = value - def set_param_vectorize(self, value): - self.vectorize = bool(value) + def set_param_vec(self, value): + self.vec = bool(value) - def set_param_vectorize_user(self, value): - self.vectorize_user = bool(value) - - def set_param_vec_cost(self, value): - self.vec_cost = bool(value) + def set_param_vec_params(self, value): + values = value.split(":") + self.vec_all = bool(values[0]) + self.vec_cost = 0 + if len(values) > 1: + self.vec_cost = int(values[1]) + self.vec_length = 50 + if len(values) > 2: + self.vec_length = int(values[2]) + self.vec_ratio = 0.60 + if len(values) > 3: + self.vec_ratio = float(values[3]) def disable_noninlinable_function(self, greenkey): cell = self.JitCell.ensure_jit_cell_at_key(greenkey) diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -553,9 +553,14 @@ 'enable_opts': 'INTERNAL USE ONLY (MAY NOT WORK OR LEAD TO CRASHES): ' 'optimizations to enable, or all = %s' % ENABLE_ALL_OPTS, 'max_unroll_recursion': 'how many levels deep to unroll a recursive function', - 'vectorize': 'turn on the vectorization optimization (vecopt). requires sse4.1', - 'vectorize_user': 'turn on the vecopt for the python user program. requires sse4.1', - 'vec_cost': 'threshold which traces to vectorize.', + 'vec': 'turn on the vectorization optimization (vecopt). requires sse4.1', + 'vec_params': 'parameters to the optimization separated by colons. <all>[:<cost>[:<length>[:<ratio>]]]. ' + 'all = 1: try to vectorize trace loops that occur outside of the numpy library. ' + 'cost = 0: threshold for which traces to bail. 0 means the costs ' + 'balance the unpacking, if below the vectorizer bails out. ' + 'length = 50: the amount of instructions allowed in "all" traces. ' + 'ratio = 0.60: the number statements that have vector equivalents divided ' + 'by the total number of trace instructions.', } PARAMETERS = {'threshold': 1039, # just above 1024, prime @@ -571,9 +576,8 @@ 'disable_unrolling': 200, 'enable_opts': 'all', 'max_unroll_recursion': 7, - 'vectorize': 0, - 'vectorize_user': 0, - 'vec_cost': 0, + 'vec': 0, + 'vec_params': '0:0:50:0.60', } unroll_parameters = unrolling_iterable(PARAMETERS.items()) @@ -636,7 +640,7 @@ self.can_never_inline = can_never_inline self.should_unroll_one_iteration = should_unroll_one_iteration self.check_untranslated = check_untranslated - self.vectorize = vectorize + self.vec = vectorize def _freeze_(self): return True _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit