Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78320:835955fe1216 Date: 2015-06-26 10:22 +0200 http://bitbucket.org/pypy/pypy/changeset/835955fe1216/
Log: splitting must be done differently diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -369,7 +369,7 @@ vbox = self._pack(vbox, packed, args, packable) self.update_input_output(self.pack) box_pos = 0 - elif packed > packable and box_pos != 0: + elif packed > packable: # box_pos == 0 then it is already at the right place # the argument has more items than the operation is able to process! args = [op.getoperation().getarg(argidx) for op in ops] @@ -377,6 +377,7 @@ self.update_input_output(self.pack) box_pos = 0 elif off != 0 and box_pos != 0: + import py; py.test.set_trace() # The original box is at a position != 0 but it # is required to be at position 0. Unpack it! args = [op.getoperation().getarg(argidx) for op in ops] @@ -542,11 +543,11 @@ return self.result_ptype def split_pack(self, pack, vec_reg_size): - op0 = pack.operations[0].getoperation() - _, vbox = self.sched_data.getvector_of_box(op0.getarg(0)) - if vbox.getcount() * self.to_size > vec_reg_size: - return vec_reg_size // self.to_size - return vbox.getcount() + count = self.arg_ptypes[0].getcount() + bytes = pack.opcount() * self.getscalarsize() + if bytes > count * self.from_size: + return bytes // (count * self.from_size) + return pack.opcount() def new_result_vector_box(self): type = self.output_type.gettype() diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -1397,18 +1397,21 @@ jump(p0, p1, p5, p6, p7, p17, p19, i68, i39, i44, i49, i51) """ trace=""" - [p0, p3, i4, i5, i6, i7] - guard_early_exit(descr=<rpython.jit.metainterp.compile.ResumeAtLoopHeaderDescr object at 0x7f492da84250>) [p0, p3, i4, i5] - f8 = raw_load(i6, i5, descr=floatarraydescr) - guard_not_invalidated(descr=<rpython.jit.metainterp.compile.ResumeGuardNotInvalidated object at 0x7f492da846d0>) [p0, f8, p3, i4, i5] - i9 = cast_float_to_int(f8) - i11 = int_and(i9, 255) - guard_true(i11, descr=<rpython.jit.metainterp.compile.ResumeGuardTrueDescr object at 0x7f492da8b790>) [p0, p3, i4, i5] - i13 = int_add(i4, 1) - i15 = int_add(i5, 8) - i16 = int_ge(i13, i7) - guard_false(i16, descr=<rpython.jit.metainterp.compile.ResumeGuardFalseDescr object at 0x7f492da93610>) [p0, i13, i15, p3, None, None] - jump(p0, p3, i13, i15, i6, i7) + [p0, p1, p9, i10, p4, i11, p3, p6, p12, i13, i14, i15, f16, i17, i18] + guard_early_exit(descr=<rpython.jit.metainterp.compile.ResumeAtLoopHeaderDescr object at 0x7f2327d4b390>) [p6, p4, p3, p1, p0, i14, i10, i13, i11, p9, p12] + i19 = raw_load(i15, i11, descr=singlefloatarraydescr) + guard_not_invalidated(descr=<rpython.jit.metainterp.compile.ResumeGuardNotInvalidated object at 0x7f23284786d0>) [p6, p4, p3, p1, p0, i19, i14, i10, i13, i11, p9, p12] + i21 = int_add(i11, 4) + f22 = cast_singlefloat_to_float(i19) + f23 = float_add(f22, f16) + i24 = cast_float_to_singlefloat(f23) + raw_store(i17, i14, i24, descr=singlefloatarraydescr) + i26 = int_add(i13, 1) + i28 = int_add(i14, 4) + i29 = int_ge(i26, i18) + guard_false(i29, descr=<rpython.jit.metainterp.compile.ResumeGuardFalseDescr object at 0x7f2327d53910>) [p6, p4, p3, p1, p0, i28, i21, i26, None, i10, None, None, p9, p12] + debug_merge_point(0, 0, '(numpy_call2: no get_printable_location)') + jump(p0, p1, p9, i10, p4, i21, p3, p6, p12, i26, i28, i15, f16, i17, i18) """ opt = self.vectorize(self.parse_loop(trace)) self.debug_print_operations(opt.loop) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit