Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78701:9c7a794d8c78
Date: 2015-07-29 09:31 +0200
http://bitbucket.org/pypy/pypy/changeset/9c7a794d8c78/
Log: moved vec_guard_false out of the assembler into the vector_ext file,
adapted test_micronumpy to use int_is_ture instead of int_and(X,
255) (which is wrong) refactored blend unused slots (out of
_guard_true/false helper for vector arguments)
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -580,13 +580,13 @@
def define_float_any():
return """
- a = [0,0,0,0,0,0,0,1,0,0,0]
+ a = [0,0,0,0,0,0,0,0.9,0,0,0]
any(a)
"""
def define_float32_any():
return """
- a = astype([0,0,0,0,0,0,0,1,0,0,0], float32)
+ a = astype([0,0,0,0,0,0,0,0.1,0,0,0], float32)
any(a)
"""
diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
--- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
@@ -113,7 +113,7 @@
guard_true(i11, descr=...)
guard_not_invalidated(descr=...)
i12 = cast_float_to_int(f10)
- i14 = int_and(i12, 255)
+ i14 = int_is_true(i12)
guard_true(i14, descr=...)
i15 = getfield_gc_pure(p1, descr=<FieldU
pypy.module.micronumpy.boxes.W_BoolBox.inst_value \d+>)
i16 = int_is_true(i15)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1741,24 +1741,6 @@
self.mc.IMUL(arglocs[0], arglocs[1])
return self._gen_guard_overflow(guard_op, guard_token)
- def _guard_vector_false(self, guard_op, loc):
- arg = guard_op.getarg(0)
- assert isinstance(arg, BoxVector)
- #
- # if the vector is not fully packed blend 1s
- if not arg.fully_packed(self.cpu.vector_register_size):
- temp = X86_64_XMM_SCRATCH_REG
- self.mc.PXOR(temp, temp)
- select = 0
- bits_used = (arg.item_count * arg.item_size * 8)
- index = bits_used // 16
- while index < 8:
- select |= (1 << index)
- index += 1
- self.mc.PBLENDW_xxi(loc.value, temp.value, select)
-
- self.mc.PTEST(loc, loc)
-
def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs,
ign_2):
loc = locs[0]
if isinstance(loc, RegLoc):
diff --git a/rpython/jit/backend/x86/vector_ext.py
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -34,6 +34,15 @@
class VectorAssemblerMixin(object):
_mixin_ = True
+ def _blend_unused_slots(self, loc, arg, temp):
+ select = 0
+ bits_used = (arg.item_count * arg.item_size * 8)
+ index = bits_used // 16
+ while index < 8:
+ select |= (1 << index)
+ index += 1
+ self.mc.PBLENDW_xxi(loc.value, temp.value, select)
+
def _guard_vector_true(self, guard_op, loc, zero=False):
arg = guard_op.getarg(0)
assert isinstance(arg, BoxVector)
@@ -44,13 +53,7 @@
# if the vector is not fully packed blend 1s
if not arg.fully_packed(self.cpu.vector_register_size):
self.mc.PCMPEQQ(temp, temp) # fill with ones
- select = 0
- bits_used = (arg.item_count * arg.item_size * 8)
- index = bits_used // 16
- while index < 8:
- select |= (1 << index)
- index += 1
- self.mc.PBLENDW_xxi(loc.value, temp.value, select)
+ self._blend_unused_slots(loc, arg, temp)
# reset to zeros
self.mc.PXOR(temp, temp)
@@ -61,8 +64,17 @@
# test if all slots are zero
self.mc.PTEST(loc, temp)
- # vector operations
- # ________________________________________
+ def _guard_vector_false(self, guard_op, loc):
+ arg = guard_op.getarg(0)
+ assert isinstance(arg, BoxVector)
+ #
+ # if the vector is not fully packed blend 1s
+ if not arg.fully_packed(self.cpu.vector_register_size):
+ temp = X86_64_XMM_SCRATCH_REG
+ self.mc.PXOR(temp, temp)
+ self._blend_unused_slots(loc, arg, temp)
+ #
+ self.mc.PTEST(loc, loc)
def _accum_update_at_exit(self, fail_locs, fail_args, faildescr, regalloc):
""" If accumulation is done in this loop, at the guard exit
@@ -182,12 +194,12 @@
self.mc.PCMPEQ(loc, temp, sizeloc.value)
def genop_guard_vec_int_is_true(self, op, guard_op, guard_token, arglocs,
resloc):
- self._guard_vector_true(op, arglocs[0])
guard_opnum = guard_op.getopnum()
if guard_opnum == rop.GUARD_TRUE:
- self.implement_guard(guard_token, 'NZ')
+ self._guard_vector_true(op, arglocs[0])
else:
- self.implement_guard(guard_token, 'Z')
+ self._guard_vector_false(op, arglocs[0])
+ self.implement_guard(guard_token, 'NZ')
def genop_vec_int_mul(self, op, arglocs, resloc):
loc0, loc1, itemsize_loc = arglocs
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit