Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r78332:ccca4d45dd30
Date: 2015-06-27 19:24 +0200
http://bitbucket.org/pypy/pypy/changeset/ccca4d45dd30/

Log:    fixed guard_false for packed arguments, the last test (set_slice)
        not working

diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -570,12 +570,12 @@
     def test_any_float(self):
         result = self.run("float_any")
         assert int(result) == 1
-        self.check_vectorized(2, 2)
+        self.check_vectorized(1, 1)
 
     def test_any_float32(self):
         result = self.run("float32_any")
         assert int(result) == 1
-        self.check_vectorized(1, 1)
+        self.check_vectorized(2, 2)
 
     def test_any(self):
         result = self.run("any")
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1644,7 +1644,7 @@
             self.mc.MOVD32_xr(resloc.value, eax.value)
             self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
 
-    def _guard_vector_arg(self, guard_op, loc, zero=False):
+    def _guard_vector_true(self, guard_op, loc, zero=False):
         arg = guard_op.getarg(0)
         assert isinstance(arg, BoxVector)
         size = arg.item_size
@@ -1653,8 +1653,7 @@
         self.mc.PXOR(temp, temp)
         # if the vector is not fully packed blend 1s
         if not arg.fully_packed(self.cpu.vector_register_size):
-            if not zero:
-                self.mc.PCMPEQQ(temp, temp) # fill with ones
+            self.mc.PCMPEQQ(temp, temp) # fill with ones
             select = 0
             bits_used = (arg.item_count * arg.item_size * 8)
             index = bits_used // 16
@@ -1663,8 +1662,7 @@
                 index += 1
             self.mc.PBLENDW_xxi(loc.value, temp.value, select)
             # reset to zeros
-            if not zero:
-                self.mc.PXOR(temp, temp)
+            self.mc.PXOR(temp, temp)
 
         self.mc.PCMPEQ(size, loc, temp)
         self.mc.PCMPEQQ(temp, temp)
@@ -1673,7 +1671,7 @@
     def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, 
ign_2):
         loc = locs[0]
         if loc.is_xmm:
-            self._guard_vector_arg(guard_op, loc, zero=False)
+            self._guard_vector_true(guard_op, loc)
             self.implement_guard(guard_token, 'NZ')
         else:
             self.mc.TEST(loc, loc)
@@ -1752,11 +1750,29 @@
         self.mc.IMUL(arglocs[0], arglocs[1])
         return self._gen_guard_overflow(guard_op, guard_token)
 
+    def _guard_vector_false(self, guard_op, loc):
+        arg = guard_op.getarg(0)
+        assert isinstance(arg, BoxVector)
+        #
+        # if the vector is not fully packed blend 1s
+        if not arg.fully_packed(self.cpu.vector_register_size):
+            temp = X86_64_XMM_SCRATCH_REG
+            self.mc.PXOR(temp, temp)
+            select = 0
+            bits_used = (arg.item_count * arg.item_size * 8)
+            index = bits_used // 16
+            while index < 8:
+                select |= (1 << index)
+                index += 1
+            self.mc.PBLENDW_xxi(loc.value, temp.value, select)
+
+        self.mc.PTEST(loc, loc)
+
     def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, 
ign_2):
         loc = locs[0]
         if loc.is_xmm:
-            self._guard_vector_arg(guard_op, loc, zero=True)
-            self.implement_guard(guard_token, 'Z')
+            self._guard_vector_false(guard_op, loc)
+            self.implement_guard(guard_token, 'NZ')
         else:
             self.mc.TEST(loc, loc)
             self.implement_guard(guard_token, 'NZ')
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -1375,6 +1375,7 @@
         guard_false(i36, 
descr=<rpython.jit.metainterp.compile.ResumeGuardFalseDescr object at 
0x7f09b34b7c10>) [p7, p6, p3, p0, i35, i24, i33, i27, None, None, i16, p9, p15, 
None, i10, p12, None]
         jump(p0, p9, i10, p3, i24, p12, i33, p6, i35, p7, p15, i16, i27, i18, 
i19, i20, i21)
         """
+        # schedule 885 -> ptype is non for raw_load?
         opt = self.vectorize(self.parse_loop(trace))
         self.debug_print_operations(opt.loop)
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to