Author: Richard Plangger <r...@pasra.at> Branch: vecopt2 Changeset: r77061:3b35c7d89697 Date: 2015-03-05 17:45 +0100 http://bitbucket.org/pypy/pypy/changeset/3b35c7d89697/
Log: started to unroll a trace in the optimizer. work in progress diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py b/rpython/jit/metainterp/optimizeopt/__init__.py --- a/rpython/jit/metainterp/optimizeopt/__init__.py +++ b/rpython/jit/metainterp/optimizeopt/__init__.py @@ -8,6 +8,7 @@ from rpython.jit.metainterp.optimizeopt.simplify import OptSimplify from rpython.jit.metainterp.optimizeopt.pure import OptPure from rpython.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce +from rpython.jit.metainterp.optimizeopt.unfold import optimize_unfold from rpython.rlib.jit import PARAMETERS, ENABLE_ALL_OPTS from rpython.rlib.unroll import unrolling_iterable from rpython.rlib.debug import debug_start, debug_stop, debug_print @@ -20,7 +21,8 @@ ('earlyforce', OptEarlyForce), ('pure', OptPure), ('heap', OptHeap), - ('unroll', None)] + ('unroll', None), + ('unfold', None)] # no direct instantiation of unroll unroll_all_opts = unrolling_iterable(ALL_OPTS) @@ -34,6 +36,7 @@ def build_opt_chain(metainterp_sd, enable_opts): optimizations = [] unroll = 'unroll' in enable_opts # 'enable_opts' is normally a dict + unfold = 'unfold' in enable_opts for name, opt in unroll_all_opts: if name in enable_opts: if opt is not None: @@ -43,9 +46,10 @@ if ('rewrite' not in enable_opts or 'virtualize' not in enable_opts or 'heap' not in enable_opts or 'unroll' not in enable_opts or 'pure' not in enable_opts): - optimizations.append(OptSimplify(unroll)) + if 'unfold' not in enable_opts: # TODO + optimizations.append(OptSimplify(unroll)) - return optimizations, unroll + return optimizations, unroll, unfold def optimize_trace(metainterp_sd, jitdriver_sd, loop, enable_opts, inline_short_preamble=True, start_state=None, @@ -57,8 +61,15 @@ try: loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations) - optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts) - if unroll: + optimizations, unroll, unfold = build_opt_chain(metainterp_sd, enable_opts) + if unfold: + return optimize_unfold(metainterp_sd, + jitdriver_sd, + loop, + optimizations, + start_state, + export_state) + elif unroll: return optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations, inline_short_preamble, start_state, diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -29,6 +29,9 @@ """ This is basically building the definition-use chain and saving this information in a graph structure. This is the same as calculating the reaching definitions and the 'looking back' whenever it is used. + + Write After Read, Write After Write dependencies are not possible, + the operations are in SSA form """ defining_indices = {} @@ -38,9 +41,12 @@ for arg in op.getarglist(): defining_indices[arg] = 0 + # TODO what about a JUMP operation? it often has many parameters (10+) and uses + # nearly every definition in the trace (for loops). Maybe we can skip this operation + if op.result is not None: - # overwrites redefinition. This is not a problem - # if the trace is in SSA form. + # the trace is always in SSA form, thus it is neither possible to have a WAR + # not a WAW dependency defining_indices[op.result] = i for arg in op.getarglist(): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py --- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py @@ -2,11 +2,13 @@ from rpython.rlib.objectmodel import instantiate from rpython.jit.metainterp.optimizeopt.test.test_util import ( LLtypeMixin, BaseTest, FakeMetaInterpStaticData, convert_old_style_to_targets) -from rpython.jit.metainterp.history import TargetToken, JitCellToken +from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop from rpython.jit.metainterp.optimizeopt import optimize_trace import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt import rpython.jit.metainterp.optimizeopt.virtualize as virtualize from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph +from rpython.jit.metainterp.optimizeopt.unroll import Inliner +from rpython.jit.metainterp.optimizeopt.unfold import OptUnfold from rpython.jit.metainterp.optimize import InvalidLoop from rpython.jit.metainterp.history import ConstInt, BoxInt, get_const_ptr_for_string from rpython.jit.metainterp import executor, compile, resume @@ -15,20 +17,25 @@ class DepTestHelper(BaseTest): - enable_opts = "vectorize" + enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unfold" def build_dependency(self, ops): + loop = self.parse_loop(ops) + return DependencyGraph(None, loop) + + def parse_loop(self, ops): loop = self.parse(ops, postprocess=self.postprocess) token = JitCellToken() loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None, descr=TargetToken(token))] + loop.operations if loop.operations[-1].getopnum() == rop.JUMP: loop.operations[-1].setdescr(token) - #self._do_optimize_loop(loop, call_pure_results, export_state=False) - #print '\n'.join([str(o) for o in loop.operations]) - #self.assert_equal(loop, expected) + return loop - return DependencyGraph(None, loop) + def assert_unfold_loop(self, loop, unroll_factor, unfolded_loop, call_pure_results=None): + OptUnfold.force_unroll_factor = unroll_factor + optloop = self._do_optimize_loop(loop, call_pure_results, export_state=True) + self.assert_equal(optloop, unfolded_loop) def assert_def_use(self, graph, from_instr_index, to_instr_index): assert graph.instr_dependency(from_instr_index, @@ -62,5 +69,37 @@ self.assert_def_use(dep_graph, 1, 2) self.assert_def_use(dep_graph, 1, 3) + def test_unroll(self): + ops = """ + [p0,p1,p2,i0] + i1 = raw_load(p1, i0, descr=floatarraydescr) + i2 = raw_load(p2, i0, descr=floatarraydescr) + i3 = int_add(i1,i2) + raw_store(p0, i0, i3, descr=floatarraydescr) + i4 = int_add(i0, 1) + i5 = int_le(i4, 10) + guard_true(i5) [p0,p1,p2,i4] + jump(p0,p1,p2,i4) + """ + unfolded_ops = """ + [p0,p1,p2,i0] + i1 = raw_load(p1, i0, descr=floatarraydescr) + i2 = raw_load(p2, i0, descr=floatarraydescr) + i3 = int_add(i1,i2) + raw_store(p0, i0, i3, descr=floatarraydescr) + i4 = int_add(i0, 1) + i5 = int_le(i4, 10) + guard_true(i5) [p0,p1,p2,i4] + i10 = raw_load(p1, i4, descr=floatarraydescr) + i11 = raw_load(p2, i4, descr=floatarraydescr) + i12 = int_add(i10,i11) + raw_store(p0, i4, i12, descr=floatarraydescr) + i20 = int_add(i4, 1) + i21 = int_le(i20, 10) + guard_true(i21) [p0,p1,p2,i20] + jump(p0,p1,p2,i21) + """ + self.assert_unfold_loop(self.parse_loop(ops),4, self.parse_loop(unfolded_ops)) + class TestLLtype(BaseTestDependencyGraph, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/unfold.py b/rpython/jit/metainterp/optimizeopt/unfold.py new file mode 100644 --- /dev/null +++ b/rpython/jit/metainterp/optimizeopt/unfold.py @@ -0,0 +1,664 @@ +import sys + +from rpython.jit.metainterp.history import TargetToken, JitCellToken, Const +from rpython.jit.metainterp.inliner import Inliner +from rpython.jit.metainterp.optimize import InvalidLoop +from rpython.jit.metainterp.optimizeopt.generalize import KillHugeIntBounds +from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization +from rpython.jit.metainterp.optimizeopt.virtualstate import (VirtualStateConstructor, + ShortBoxes, BadVirtualState, VirtualStatesCantMatch) +from rpython.jit.metainterp.resoperation import rop, ResOperation, GuardResOp +from rpython.jit.metainterp.resume import Snapshot +from rpython.jit.metainterp import compile +from rpython.rlib.debug import debug_print, debug_start, debug_stop + + +def optimize_unfold(metainterp_sd, jitdriver_sd, loop, optimizations, start_state=None, + export_state=True): + opt = OptUnfold(metainterp_sd, jitdriver_sd, loop, optimizations) + return opt.propagate_all_forward(start_state, export_state) + + +class UnfoldOptimizer(Optimizer): + def setup(self): + self.importable_values = {} + self.emitting_dissabled = False + self.emitted_guards = 0 + + def ensure_imported(self, value): + if not self.emitting_dissabled and value in self.importable_values: + imp = self.importable_values[value] + del self.importable_values[value] + imp.import_value(value) + + def emit_operation(self, op): + if op.returns_bool_result(): + self.bool_boxes[self.getvalue(op.result)] = None + if self.emitting_dissabled: + return + if op.is_guard(): + self.emitted_guards += 1 # FIXME: can we use counter in self._emit_operation? + self._emit_operation(op) + + +class OptUnfold(Optimization): + """ In contrast to the loop unroll optimization this optimization + unrolls the loop many times instead of just peeling off one trace. + """ + + inline_short_preamble = True + + # for testing purpose only + # TODO: hide it from rpython + _force_unroll_factor = -1 + + def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations): + self.optimizer = UnfoldOptimizer(metainterp_sd, jitdriver_sd, + loop, optimizations) + self.boxes_created_this_iteration = None + + def get_virtual_state(self, args): + modifier = VirtualStateConstructor(self.optimizer) + return modifier.get_virtual_state(args) + + def fix_snapshot(self, jump_args, snapshot): + if snapshot is None: + return None + snapshot_args = snapshot.boxes + new_snapshot_args = [] + for a in snapshot_args: + a = self.getvalue(a).get_key_box() + new_snapshot_args.append(a) + prev = self.fix_snapshot(jump_args, snapshot.prev) + return Snapshot(prev, new_snapshot_args) + + def _rename_arguments_ssa(rename_map, label_args, jump_args): + + for la,ja in zip(label_args, jump_args): + if la != ja: + rename_map[la] = ja + + return new_jump_args + + def propagate_all_forward(self, starting_state, export_state=True): + + unroll_factor = 2 + + self.optimizer.exporting_state = export_state + loop = self.optimizer.loop + self.optimizer.clear_newoperations() + + + label_op = loop.operations[0] + jump_op = loop.operations[-1] + operations = loop.operations[1:-1] + loop.operations = [] + + iterations = [[op.clone() for op in operations]] + label_op_args = label_op.getarglist() + + jump_op_args = jump_op.getarglist() + + rename_map = {} + for unroll_i in range(2, unroll_factor+1): + _rename_arguments_ssa(rename_map, label_op_args, jump_op_args) + iteration_ops = [] + for op in operations: + cop = op.clone() + iteration_ops.append(cop) + iterations.append(iteration_ops) + + loop.operations.append(label_op) + for iteration in iterations: + for op in iteration: + loop.operations.append(op) + loop.operations.append(jump_op) + + #start_label = loop.operations[0] + #if start_label.getopnum() == rop.LABEL: + # loop.operations = loop.operations[1:] + # # We need to emit the label op before import_state() as emitting it + # # will clear heap caches + # self.optimizer.send_extra_operation(start_label) + #else: + # start_label = None + + #patchguardop = None + #if len(loop.operations) > 1: + # patchguardop = loop.operations[-2] + # if patchguardop.getopnum() != rop.GUARD_FUTURE_CONDITION: + # patchguardop = None + + #jumpop = loop.operations[-1] + #if jumpop.getopnum() == rop.JUMP or jumpop.getopnum() == rop.LABEL: + # loop.operations = loop.operations[:-1] + #else: + # jumpop = None + + #self.import_state(start_label, starting_state) + #self.optimizer.propagate_all_forward(clear=False) + + #if not jumpop: + # return + + #cell_token = jumpop.getdescr() + #assert isinstance(cell_token, JitCellToken) + #stop_label = ResOperation(rop.LABEL, jumpop.getarglist(), None, TargetToken(cell_token)) + + #if jumpop.getopnum() == rop.JUMP: + # if self.jump_to_already_compiled_trace(jumpop, patchguardop): + # # Found a compiled trace to jump to + # if self.short: + # # Construct our short preamble + # assert start_label + # self.close_bridge(start_label) + # return + + # if start_label and self.jump_to_start_label(start_label, stop_label): + # # Initial label matches, jump to it + # jumpop = ResOperation(rop.JUMP, stop_label.getarglist(), None, + # descr=start_label.getdescr()) + # if self.short: + # # Construct our short preamble + # self.close_loop(start_label, jumpop, patchguardop) + # else: + # self.optimizer.send_extra_operation(jumpop) + # return + + # if cell_token.target_tokens: + # limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit + # if cell_token.retraced_count < limit: + # cell_token.retraced_count += 1 + # debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit)) + # else: + # debug_print("Retrace count reached, jumping to preamble") + # assert cell_token.target_tokens[0].virtual_state is None + # jumpop = jumpop.clone() + # jumpop.setdescr(cell_token.target_tokens[0]) + # self.optimizer.send_extra_operation(jumpop) + # return + + ## Found nothing to jump to, emit a label instead + + #if self.short: + # # Construct our short preamble + # assert start_label + # self.close_bridge(start_label) + + #self.optimizer.flush() + #if export_state: + # KillHugeIntBounds(self.optimizer).apply() + + #loop.operations = self.optimizer.get_newoperations() + #if export_state: + # final_state = self.export_state(stop_label) + #else: + # final_state = None + #loop.operations.append(stop_label) + #return final_state + return loop + + def jump_to_start_label(self, start_label, stop_label): + if not start_label or not stop_label: + return False + + stop_target = stop_label.getdescr() + start_target = start_label.getdescr() + assert isinstance(stop_target, TargetToken) + assert isinstance(start_target, TargetToken) + return stop_target.targeting_jitcell_token is start_target.targeting_jitcell_token + + + def export_state(self, targetop): + original_jump_args = targetop.getarglist() + jump_args = [self.getvalue(a).get_key_box() for a in original_jump_args] + + virtual_state = self.get_virtual_state(jump_args) + + values = [self.getvalue(arg) for arg in jump_args] + inputargs = virtual_state.make_inputargs(values, self.optimizer) + short_inputargs = virtual_state.make_inputargs(values, self.optimizer, keyboxes=True) + + if self.boxes_created_this_iteration is not None: + for box in self.inputargs: + self.boxes_created_this_iteration[box] = None + + short_boxes = ShortBoxes(self.optimizer, inputargs) + + self.optimizer.clear_newoperations() + for i in range(len(original_jump_args)): + srcbox = jump_args[i] + if values[i].is_virtual(): + srcbox = values[i].force_box(self.optimizer) + if original_jump_args[i] is not srcbox: + op = ResOperation(rop.SAME_AS, [srcbox], original_jump_args[i]) + self.optimizer.emit_operation(op) + inputarg_setup_ops = self.optimizer.get_newoperations() + + target_token = targetop.getdescr() + assert isinstance(target_token, TargetToken) + targetop.initarglist(inputargs) + target_token.virtual_state = virtual_state + target_token.short_preamble = [ResOperation(rop.LABEL, short_inputargs, None)] + + exported_values = {} + for box in inputargs: + exported_values[box] = self.optimizer.getvalue(box) + for op in short_boxes.operations(): + if op and op.result: + box = op.result + exported_values[box] = self.optimizer.getvalue(box) + + return ExportedState(short_boxes, inputarg_setup_ops, exported_values) + + def import_state(self, targetop, exported_state): + if not targetop: # Trace did not start with a label + self.inputargs = self.optimizer.loop.inputargs + self.short = None + self.initial_virtual_state = None + return + + self.inputargs = targetop.getarglist() + target_token = targetop.getdescr() + assert isinstance(target_token, TargetToken) + if not exported_state: + # No state exported, construct one without virtuals + self.short = None + virtual_state = self.get_virtual_state(self.inputargs) + self.initial_virtual_state = virtual_state + return + + self.short = target_token.short_preamble[:] + self.short_seen = {} + self.short_boxes = exported_state.short_boxes + self.initial_virtual_state = target_token.virtual_state + + for box in self.inputargs: + preamble_value = exported_state.exported_values[box] + value = self.optimizer.getvalue(box) + value.import_from(preamble_value, self.optimizer) + + # Setup the state of the new optimizer by emiting the + # short operations and discarding the result + self.optimizer.emitting_dissabled = True + for op in exported_state.inputarg_setup_ops: + self.optimizer.send_extra_operation(op) + + seen = {} + for op in self.short_boxes.operations(): + self.ensure_short_op_emitted(op, self.optimizer, seen) + if op and op.result: + preamble_value = exported_state.exported_values[op.result] + value = self.optimizer.getvalue(op.result) + if not value.is_virtual() and not value.is_constant(): + imp = ValueImporter(self, preamble_value, op) + self.optimizer.importable_values[value] = imp + newvalue = self.optimizer.getvalue(op.result) + newresult = newvalue.get_key_box() + # note that emitting here SAME_AS should not happen, but + # in case it does, we would prefer to be suboptimal in asm + # to a fatal RPython exception. + if newresult is not op.result and \ + not self.short_boxes.has_producer(newresult) and \ + not newvalue.is_constant(): + op = ResOperation(rop.SAME_AS, [op.result], newresult) + self.optimizer._newoperations.append(op) + #if self.optimizer.loop.logops: + # debug_print(' Falling back to add extra: ' + + # self.optimizer.loop.logops.repr_of_resop(op)) + + self.optimizer.flush() + self.optimizer.emitting_dissabled = False + + def close_bridge(self, start_label): + inputargs = self.inputargs + short_jumpargs = inputargs[:] + + # We dont need to inline the short preamble we are creating as we are conneting + # the bridge to a different trace with a different short preamble + self.short_inliner = None + + newoperations = self.optimizer.get_newoperations() + self.boxes_created_this_iteration = {} + i = 0 + while i < len(newoperations): + self._import_op(newoperations[i], inputargs, short_jumpargs, []) + i += 1 + newoperations = self.optimizer.get_newoperations() + self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=start_label.getdescr())) + self.finalize_short_preamble(start_label) + + def close_loop(self, start_label, jumpop, patchguardop): + virtual_state = self.initial_virtual_state + short_inputargs = self.short[0].getarglist() + inputargs = self.inputargs + short_jumpargs = inputargs[:] + + # Construct jumpargs from the virtual state + original_jumpargs = jumpop.getarglist()[:] + values = [self.getvalue(arg) for arg in jumpop.getarglist()] + try: + jumpargs = virtual_state.make_inputargs(values, self.optimizer) + except BadVirtualState: + raise InvalidLoop('The state of the optimizer at the end of ' + + 'peeled loop is inconsistent with the ' + + 'VirtualState at the beginning of the peeled ' + + 'loop') + jumpop.initarglist(jumpargs) + + # Inline the short preamble at the end of the loop + jmp_to_short_args = virtual_state.make_inputargs(values, + self.optimizer, + keyboxes=True) + assert len(short_inputargs) == len(jmp_to_short_args) + args = {} + for i in range(len(short_inputargs)): + if short_inputargs[i] in args: + if args[short_inputargs[i]] != jmp_to_short_args[i]: + raise InvalidLoop('The short preamble wants the ' + + 'same box passed to multiple of its ' + + 'inputargs, but the jump at the ' + + 'end of this bridge does not do that.') + + args[short_inputargs[i]] = jmp_to_short_args[i] + self.short_inliner = Inliner(short_inputargs, jmp_to_short_args) + self._inline_short_preamble(self.short, self.short_inliner, + patchguardop, self.short_boxes.assumed_classes) + + # Import boxes produced in the preamble but used in the loop + newoperations = self.optimizer.get_newoperations() + self.boxes_created_this_iteration = {} + i = j = 0 + while i < len(newoperations) or j < len(jumpargs): + if i == len(newoperations): + while j < len(jumpargs): + a = jumpargs[j] + #if self.optimizer.loop.logops: + # debug_print('J: ' + self.optimizer.loop.logops.repr_of_arg(a)) + self.import_box(a, inputargs, short_jumpargs, jumpargs) + j += 1 + else: + self._import_op(newoperations[i], inputargs, short_jumpargs, jumpargs) + i += 1 + newoperations = self.optimizer.get_newoperations() + + jumpop.initarglist(jumpargs) + self.optimizer.send_extra_operation(jumpop) + self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=jumpop.getdescr())) + + # Verify that the virtual state at the end of the loop is one + # that is compatible with the virtual state at the start of the loop + final_virtual_state = self.get_virtual_state(original_jumpargs) + #debug_start('jit-log-virtualstate') + #virtual_state.debug_print('Closed loop with ') + bad = {} + if not virtual_state.generalization_of(final_virtual_state, bad, + cpu=self.optimizer.cpu): + # We ended up with a virtual state that is not compatible + # and we are thus unable to jump to the start of the loop + #final_virtual_state.debug_print("Bad virtual state at end of loop, ", + # bad) + #debug_stop('jit-log-virtualstate') + raise InvalidLoop('The virtual state at the end of the peeled ' + + 'loop is not compatible with the virtual ' + + 'state at the start of the loop which makes ' + + 'it impossible to close the loop') + + #debug_stop('jit-log-virtualstate') + + maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards + if self.optimizer.emitted_guards > maxguards: + target_token = jumpop.getdescr() + assert isinstance(target_token, TargetToken) + target_token.targeting_jitcell_token.retraced_count = sys.maxint + + self.finalize_short_preamble(start_label) + + def finalize_short_preamble(self, start_label): + short = self.short + assert short[-1].getopnum() == rop.JUMP + target_token = start_label.getdescr() + assert isinstance(target_token, TargetToken) + + # Turn guards into conditional jumps to the preamble + for i in range(len(short)): + op = short[i] + if op.is_guard(): + op = op.clone() + op.setfailargs(None) + op.setdescr(None) # will be set to a proper descr when the preamble is used + short[i] = op + + # Clone ops and boxes to get private versions and + short_inputargs = short[0].getarglist() + boxmap = {} + newargs = [None] * len(short_inputargs) + for i in range(len(short_inputargs)): + a = short_inputargs[i] + if a in boxmap: + newargs[i] = boxmap[a] + else: + newargs[i] = a.clonebox() + boxmap[a] = newargs[i] + inliner = Inliner(short_inputargs, newargs) + target_token.assumed_classes = {} + for i in range(len(short)): + op = short[i] + newop = inliner.inline_op(op) + if op.result and op.result in self.short_boxes.assumed_classes: + target_token.assumed_classes[newop.result] = self.short_boxes.assumed_classes[op.result] + short[i] = newop + + # Forget the values to allow them to be freed + for box in short[0].getarglist(): + box.forget_value() + for op in short: + if op.result: + op.result.forget_value() + target_token.short_preamble = self.short + + def ensure_short_op_emitted(self, op, optimizer, seen): + if op is None: + return + if op.result is not None and op.result in seen: + return + for a in op.getarglist(): + if not isinstance(a, Const) and a not in seen: + self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer, + seen) + + #if self.optimizer.loop.logops: + # debug_print(' Emitting short op: ' + + # self.optimizer.loop.logops.repr_of_resop(op)) + + optimizer.send_extra_operation(op) + seen[op.result] = None + if op.is_ovf(): + guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None) + optimizer.send_extra_operation(guard) + + def add_op_to_short(self, op, emit=True, guards_needed=False): + if op is None: + return None + if op.result is not None and op.result in self.short_seen: + if emit and self.short_inliner: + return self.short_inliner.inline_arg(op.result) + else: + return None + + for a in op.getarglist(): + if not isinstance(a, Const) and a not in self.short_seen: + self.add_op_to_short(self.short_boxes.producer(a), emit, guards_needed) + if op.is_guard(): + op.setdescr(None) # will be set to a proper descr when the preamble is used + + if guards_needed and self.short_boxes.has_producer(op.result): + value_guards = self.getvalue(op.result).make_guards(op.result) + else: + value_guards = [] + + self.short.append(op) + self.short_seen[op.result] = None + if emit and self.short_inliner: + newop = self.short_inliner.inline_op(op) + self.optimizer.send_extra_operation(newop) + else: + newop = None + + if op.is_ovf(): + # FIXME: ensure that GUARD_OVERFLOW:ed ops not end up here + guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None) + self.add_op_to_short(guard, emit, guards_needed) + for guard in value_guards: + self.add_op_to_short(guard, emit, guards_needed) + + if newop: + return newop.result + return None + + def import_box(self, box, inputargs, short_jumpargs, jumpargs): + if isinstance(box, Const) or box in inputargs: + return + if box in self.boxes_created_this_iteration: + return + + short_op = self.short_boxes.producer(box) + newresult = self.add_op_to_short(short_op) + + short_jumpargs.append(short_op.result) + inputargs.append(box) + box = newresult + if box in self.optimizer.values: + box = self.optimizer.values[box].force_box(self.optimizer) + jumpargs.append(box) + + + def _import_op(self, op, inputargs, short_jumpargs, jumpargs): + self.boxes_created_this_iteration[op.result] = None + args = op.getarglist() + if op.is_guard(): + args = args + op.getfailargs() + + for a in args: + self.import_box(a, inputargs, short_jumpargs, jumpargs) + + def jump_to_already_compiled_trace(self, jumpop, patchguardop): + jumpop = jumpop.clone() + assert jumpop.getopnum() == rop.JUMP + cell_token = jumpop.getdescr() + + assert isinstance(cell_token, JitCellToken) + if not cell_token.target_tokens: + return False + + if not self.inline_short_preamble: + assert cell_token.target_tokens[0].virtual_state is None + jumpop.setdescr(cell_token.target_tokens[0]) + self.optimizer.send_extra_operation(jumpop) + return True + + args = jumpop.getarglist() + virtual_state = self.get_virtual_state(args) + values = [self.getvalue(arg) + for arg in jumpop.getarglist()] + debug_start('jit-log-virtualstate') + virtual_state.debug_print("Looking for ", metainterp_sd=self.optimizer.metainterp_sd) + + for target in cell_token.target_tokens: + if not target.virtual_state: + continue + extra_guards = [] + + try: + cpu = self.optimizer.cpu + state = target.virtual_state.generate_guards(virtual_state, + values, + cpu) + + extra_guards = state.extra_guards + if extra_guards: + debugmsg = 'Guarded to match ' + else: + debugmsg = 'Matched ' + except VirtualStatesCantMatch, e: + debugmsg = 'Did not match:\n%s\n' % (e.msg, ) + target.virtual_state.debug_print(debugmsg, e.state.bad, metainterp_sd=self.optimizer.metainterp_sd) + continue + + assert patchguardop is not None or (extra_guards == [] and len(target.short_preamble) == 1) + + target.virtual_state.debug_print(debugmsg, {}) + + debug_stop('jit-log-virtualstate') + + args = target.virtual_state.make_inputargs(values, self.optimizer, + keyboxes=True) + short_inputargs = target.short_preamble[0].getarglist() + inliner = Inliner(short_inputargs, args) + + for guard in extra_guards: + if guard.is_guard(): + assert isinstance(patchguardop, GuardResOp) + assert isinstance(guard, GuardResOp) + guard.rd_snapshot = patchguardop.rd_snapshot + guard.rd_frame_info_list = patchguardop.rd_frame_info_list + guard.setdescr(compile.ResumeAtPositionDescr()) + self.optimizer.send_extra_operation(guard) + + try: + # NB: the short_preamble ends with a jump + self._inline_short_preamble(target.short_preamble, inliner, + patchguardop, + target.assumed_classes) + except InvalidLoop: + #debug_print("Inlining failed unexpectedly", + # "jumping to preamble instead") + assert cell_token.target_tokens[0].virtual_state is None + jumpop.setdescr(cell_token.target_tokens[0]) + self.optimizer.send_extra_operation(jumpop) + return True + debug_stop('jit-log-virtualstate') + return False + + def _inline_short_preamble(self, short_preamble, inliner, patchguardop, + assumed_classes): + i = 1 + # XXX this is intentiontal :-(. short_preamble can change during the + # loop in some cases + while i < len(short_preamble): + shop = short_preamble[i] + newop = inliner.inline_op(shop) + if newop.is_guard(): + if not patchguardop: + raise InvalidLoop("would like to have short preamble, but it has a guard and there's no guard_future_condition") + assert isinstance(newop, GuardResOp) + assert isinstance(patchguardop, GuardResOp) + newop.rd_snapshot = patchguardop.rd_snapshot + newop.rd_frame_info_list = patchguardop.rd_frame_info_list + newop.setdescr(compile.ResumeAtPositionDescr()) + self.optimizer.send_extra_operation(newop) + if shop.result in assumed_classes: + classbox = self.getvalue(newop.result).get_constant_class(self.optimizer.cpu) + if not classbox or not classbox.same_constant(assumed_classes[shop.result]): + raise InvalidLoop('The class of an opaque pointer before the jump ' + + 'does not mach the class ' + + 'it has at the start of the target loop') + i += 1 + + +class ValueImporter(object): + def __init__(self, unroll, value, op): + self.unroll = unroll + self.preamble_value = value + self.op = op + + def import_value(self, value): + value.import_from(self.preamble_value, self.unroll.optimizer) + self.unroll.add_op_to_short(self.op, False, True) + + +class ExportedState(object): + def __init__(self, short_boxes, inputarg_setup_ops, exported_values): + self.short_boxes = short_boxes + self.inputarg_setup_ops = inputarg_setup_ops + self.exported_values = exported_values diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -448,7 +448,7 @@ """Inconsistency in the JIT hints.""" ENABLE_ALL_OPTS = ( - 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll') + 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll:unfold') PARAMETER_DOCS = { 'threshold': 'number of times a loop has to run for it to become hot', _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit