Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78701:9c7a794d8c78 Date: 2015-07-29 09:31 +0200 http://bitbucket.org/pypy/pypy/changeset/9c7a794d8c78/
Log: moved vec_guard_false out of the assembler into the vector_ext file, adapted test_micronumpy to use int_is_ture instead of int_and(X, 255) (which is wrong) refactored blend unused slots (out of _guard_true/false helper for vector arguments) diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -580,13 +580,13 @@ def define_float_any(): return """ - a = [0,0,0,0,0,0,0,1,0,0,0] + a = [0,0,0,0,0,0,0,0.9,0,0,0] any(a) """ def define_float32_any(): return """ - a = astype([0,0,0,0,0,0,0,1,0,0,0], float32) + a = astype([0,0,0,0,0,0,0,0.1,0,0,0], float32) any(a) """ diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py --- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py +++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py @@ -113,7 +113,7 @@ guard_true(i11, descr=...) guard_not_invalidated(descr=...) i12 = cast_float_to_int(f10) - i14 = int_and(i12, 255) + i14 = int_is_true(i12) guard_true(i14, descr=...) i15 = getfield_gc_pure(p1, descr=<FieldU pypy.module.micronumpy.boxes.W_BoolBox.inst_value \d+>) i16 = int_is_true(i15) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1741,24 +1741,6 @@ self.mc.IMUL(arglocs[0], arglocs[1]) return self._gen_guard_overflow(guard_op, guard_token) - def _guard_vector_false(self, guard_op, loc): - arg = guard_op.getarg(0) - assert isinstance(arg, BoxVector) - # - # if the vector is not fully packed blend 1s - if not arg.fully_packed(self.cpu.vector_register_size): - temp = X86_64_XMM_SCRATCH_REG - self.mc.PXOR(temp, temp) - select = 0 - bits_used = (arg.item_count * arg.item_size * 8) - index = bits_used // 16 - while index < 8: - select |= (1 << index) - index += 1 - self.mc.PBLENDW_xxi(loc.value, temp.value, select) - - self.mc.PTEST(loc, loc) - def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2): loc = locs[0] if isinstance(loc, RegLoc): diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py --- a/rpython/jit/backend/x86/vector_ext.py +++ b/rpython/jit/backend/x86/vector_ext.py @@ -34,6 +34,15 @@ class VectorAssemblerMixin(object): _mixin_ = True + def _blend_unused_slots(self, loc, arg, temp): + select = 0 + bits_used = (arg.item_count * arg.item_size * 8) + index = bits_used // 16 + while index < 8: + select |= (1 << index) + index += 1 + self.mc.PBLENDW_xxi(loc.value, temp.value, select) + def _guard_vector_true(self, guard_op, loc, zero=False): arg = guard_op.getarg(0) assert isinstance(arg, BoxVector) @@ -44,13 +53,7 @@ # if the vector is not fully packed blend 1s if not arg.fully_packed(self.cpu.vector_register_size): self.mc.PCMPEQQ(temp, temp) # fill with ones - select = 0 - bits_used = (arg.item_count * arg.item_size * 8) - index = bits_used // 16 - while index < 8: - select |= (1 << index) - index += 1 - self.mc.PBLENDW_xxi(loc.value, temp.value, select) + self._blend_unused_slots(loc, arg, temp) # reset to zeros self.mc.PXOR(temp, temp) @@ -61,8 +64,17 @@ # test if all slots are zero self.mc.PTEST(loc, temp) - # vector operations - # ________________________________________ + def _guard_vector_false(self, guard_op, loc): + arg = guard_op.getarg(0) + assert isinstance(arg, BoxVector) + # + # if the vector is not fully packed blend 1s + if not arg.fully_packed(self.cpu.vector_register_size): + temp = X86_64_XMM_SCRATCH_REG + self.mc.PXOR(temp, temp) + self._blend_unused_slots(loc, arg, temp) + # + self.mc.PTEST(loc, loc) def _accum_update_at_exit(self, fail_locs, fail_args, faildescr, regalloc): """ If accumulation is done in this loop, at the guard exit @@ -182,12 +194,12 @@ self.mc.PCMPEQ(loc, temp, sizeloc.value) def genop_guard_vec_int_is_true(self, op, guard_op, guard_token, arglocs, resloc): - self._guard_vector_true(op, arglocs[0]) guard_opnum = guard_op.getopnum() if guard_opnum == rop.GUARD_TRUE: - self.implement_guard(guard_token, 'NZ') + self._guard_vector_true(op, arglocs[0]) else: - self.implement_guard(guard_token, 'Z') + self._guard_vector_false(op, arglocs[0]) + self.implement_guard(guard_token, 'NZ') def genop_vec_int_mul(self, op, arglocs, resloc): loc0, loc1, itemsize_loc = arglocs _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit