Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78728:948a227eab7f Date: 2015-07-31 16:01 +0200 http://bitbucket.org/pypy/pypy/changeset/948a227eab7f/
Log: that was a nasty problem. entering the vecopt trace through the preamble only worked for non accum/expanded traces, otherwise the arguments would not match the loop has now an original label, where invariant operations follow leading to an label that can carry expanded values diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -85,7 +85,7 @@ call_many_to_one_driver = jit.JitDriver( name='numpy_call_many_to_one', greens=['shapelen', 'nin', 'func', 'res_dtype'], - reds='auto', vectorize=True) + reds='auto') def call_many_to_one(space, shape, func, res_dtype, in_args, out): # out must hav been built. func needs no calc_type, is usually an @@ -119,7 +119,7 @@ call_many_to_many_driver = jit.JitDriver( name='numpy_call_many_to_many', greens=['shapelen', 'nin', 'nout', 'func', 'res_dtype'], - reds='auto', vectorize=True) + reds='auto') def call_many_to_many(space, shape, func, res_dtype, in_args, out_args): # out must hav been built. func needs no calc_type, is usually an @@ -228,7 +228,7 @@ reduce_cum_driver = jit.JitDriver( name='numpy_reduce_cum_driver', greens=['shapelen', 'func', 'dtype', 'out_dtype'], - reds='auto', vectorize=True) + reds='auto') def compute_reduce_cumulative(space, obj, out, calc_dtype, func, identity): obj_iter, obj_state = obj.create_iter() @@ -356,7 +356,7 @@ def _new_argmin_argmax(op_name): arg_driver = jit.JitDriver(name='numpy_' + op_name, greens = ['shapelen', 'dtype'], - reds = 'auto', vectorize=True) + reds = 'auto') def argmin_argmax(arr): result = 0 @@ -536,7 +536,7 @@ flatiter_getitem_driver = jit.JitDriver(name = 'numpy_flatiter_getitem', greens = ['dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def flatiter_getitem(res, base_iter, base_state, step): ri, rs = res.create_iter() @@ -570,7 +570,7 @@ fromstring_driver = jit.JitDriver(name = 'numpy_fromstring', greens = ['itemsize', 'dtype'], - reds = 'auto', vectorize=True) + reds = 'auto') def fromstring_loop(space, a, dtype, itemsize, s): i = 0 @@ -604,7 +604,7 @@ getitem_int_driver = jit.JitDriver(name = 'numpy_getitem_int', greens = ['shapelen', 'indexlen', 'prefixlen', 'dtype'], - reds = 'auto', vectorize=True) + reds = 'auto') def getitem_array_int(space, arr, res, iter_shape, indexes_w, prefix_w): shapelen = len(iter_shape) @@ -632,7 +632,7 @@ setitem_int_driver = jit.JitDriver(name = 'numpy_setitem_int', greens = ['shapelen', 'indexlen', 'prefixlen', 'dtype'], - reds = 'auto', vectorize=True) + reds = 'auto') def setitem_array_int(space, arr, iter_shape, indexes_w, val_arr, prefix_w): @@ -762,7 +762,7 @@ diagonal_simple_driver = jit.JitDriver(name='numpy_diagonal_simple_driver', greens = ['axis1', 'axis2'], - reds = 'auto', vectorize=True) + reds = 'auto') def diagonal_simple(space, arr, out, offset, axis1, axis2, size): out_iter, out_state = out.create_iter() @@ -806,7 +806,7 @@ def _new_binsearch(side, op_name): binsearch_driver = jit.JitDriver(name='numpy_binsearch_' + side, greens=['dtype'], - reds='auto', vectorize=True) + reds='auto') def binsearch(space, arr, key, ret): assert len(arr.get_shape()) == 1 diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -466,8 +466,6 @@ def test_cumsum(self): result = self.run("cumsum") assert result == 15 - # not vectorizable, has one back edge - self.check_vectorized(1, 0) def define_axissum(): return """ @@ -803,7 +801,7 @@ def test_flat_getitem(self): result = self.run("flat_getitem") assert result == 10.0 - self.check_vectorized(0,0) + self.check_vectorized(1,1) def define_flat_setitem(): return ''' diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -149,23 +149,14 @@ [inliner.inline_op(h_ops[i]) for i in range(start, len(h_ops))] + \ [ResOperation(rop.JUMP, [inliner.inline_arg(a) for a in jumpargs], None, descr=jitcell_token)] - target_token = part.operations[0].getdescr() - assert isinstance(target_token, TargetToken) - all_target_tokens.append(target_token) - inputargs = jumpargs - jumpargs = part.operations[-1].getarglist() - try: optimize_trace(metainterp_sd, jitdriver_sd, part, warmstate, start_state=start_state, export_state=False, try_disabling_unroll=try_disabling_unroll) except InvalidLoop: return None - - loop.operations = loop.operations[:-1] + part.operations - loop.versions = part.versions - if part.quasi_immutable_deps: - loop.quasi_immutable_deps.update(part.quasi_immutable_deps) + # + loop.append_loop(part, all_target_tokens) assert part.operations[-1].getopnum() != rop.LABEL if loop.versions is not None: @@ -197,7 +188,6 @@ metainterp_sd = metainterp.staticdata cpu = metainterp_sd.cpu if loop.versions is not None: - token = jitcell_token for version in loop.versions: if len(version.faildescrs) == 0: continue diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py --- a/rpython/jit/metainterp/history.py +++ b/rpython/jit/metainterp/history.py @@ -761,7 +761,10 @@ def register_all_guards(self, opt_ops, invariant_arg_count=0): from rpython.jit.metainterp.compile import CompileLoopVersionDescr + pass_by = 0 idx = index_of_first(rop.LABEL, opt_ops) + if opt_ops[idx].getdescr() is not opt_ops[-1].getdescr(): + idx = index_of_first(rop.LABEL, opt_ops, pass_by=1) assert idx >= 0 version_failargs = opt_ops[idx].getarglist() if invariant_arg_count > 0: @@ -799,6 +802,7 @@ op.rd_snapshot = None def update_token(self, jitcell_token): + # this is only invoked for versioned loops! label = self.operations[self.label_pos] jump = self.operations[-1] # @@ -849,6 +853,29 @@ insns[opname] = insns.get(opname, 0) + 1 return insns + def append_loop(self, loop, all_target_tokens): + # append e.g. the peeled loop to this loop! + label, jump = loop.operations[0], loop.operations[-1] + assert label.getopnum() == rop.LABEL + assert jump.getopnum() == rop.JUMP + target_token = None + i = 0 + # adds all target token until the one is found that jumps from the + # last instruction to the label + while target_token is not jump.getdescr(): + # there is another label + op = loop.operations[i] + if op.getopnum() == rop.LABEL: + target_token = op.getdescr() + assert isinstance(target_token, TargetToken) + all_target_tokens.append(target_token) + i += 1 + # + self.operations = self.operations[:-1] + loop.operations + self.versions = loop.versions + if loop.quasi_immutable_deps: + self.quasi_immutable_deps.update(loop.quasi_immutable_deps) + def get_operations(self): return self.operations diff --git a/rpython/jit/metainterp/optimizeopt/guard.py b/rpython/jit/metainterp/optimizeopt/guard.py --- a/rpython/jit/metainterp/optimizeopt/guard.py +++ b/rpython/jit/metainterp/optimizeopt/guard.py @@ -119,8 +119,8 @@ descr = myop.getdescr() descr.copy_all_attributes_from(other.op.getdescr()) myop.rd_frame_info_list = otherop.rd_frame_info_list + myop.setfailargs(otherop.getfailargs()) myop.rd_snapshot = otherop.rd_snapshot - myop.setfailargs(otherop.getfailargs()) def emit_varops(self, opt, var, old_arg): assert isinstance(var, IndexVar) diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -1,6 +1,6 @@ from rpython.jit.metainterp.history import (VECTOR,FLOAT,INT,ConstInt,BoxVector, - BoxFloat,BoxInt,ConstFloat) + BoxFloat,BoxInt,ConstFloat,TargetToken) from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp) from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, MemoryRef, Node, IndexVar) @@ -839,24 +839,32 @@ assert off < vector.getcount() self.box_to_vbox[box] = (off, vector) - def prepend_invariant_operations(self, oplist): + def prepend_invariant_operations(self, oplist, orig_label_args): if len(self.invariant_oplist) > 0: label = oplist[0] assert label.getopnum() == rop.LABEL + # jump = oplist[-1] assert jump.getopnum() == rop.JUMP - - label_args = label.getarglist() + # + label_args = label.getarglist()[:] jump_args = jump.getarglist() for var in self.invariant_vector_vars: label_args.append(var) jump_args.append(var) - - oplist[0] = label.copy_and_change(label.getopnum(), label_args, None, label.getdescr()) - oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args, None, jump.getdescr()) - - return self.invariant_oplist + oplist - + # + # in case of any invariant_vector_vars, the label is restored + # and the invariant operations are added between the original label + # and the new label + descr = label.getdescr() + assert isinstance(descr, TargetToken) + token = TargetToken(descr.targeting_jitcell_token) + oplist[0] = label.copy_and_change(label.getopnum(), label_args, None, token) + oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args, None, token) + # + return [ResOperation(rop.LABEL, orig_label_args, None, descr)] + \ + self.invariant_oplist + oplist + # return oplist class Pack(object): diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -107,10 +107,12 @@ self.cpu = metainterp_sd.cpu self.costmodel = X86_CostModel(cost_threshold, self.cpu.vector_register_size) self.appended_arg_count = 0 + self.orig_label_args = None def propagate_all_forward(self, clear=True): self.clear_newoperations() label = self.loop.operations[0] + self.orig_label_args = label.getarglist()[:] jump = self.loop.operations[-1] if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \ label.getopnum() != rop.LABEL: @@ -463,7 +465,8 @@ if accum: accum.save_to_descr(op.getdescr(),i) self.loop.operations = \ - sched_data.prepend_invariant_operations(self._newoperations) + sched_data.prepend_invariant_operations(self._newoperations, + self.orig_label_args) self.clear_newoperations() def unpack_from_vector(self, op, sched_data, renamer): @@ -577,7 +580,7 @@ # tgt_op.setdescr(descr) tgt_op.rd_snapshot = op.rd_snapshot - tgt_op.setfailargs(op.getfailargs()) + tgt_op.setfailargs(op.getfailargs()[:]) class CostModel(object): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit