Hi,
It was recently brought to our attention that the existing emmintrin.h
header, which was believed to be feature-complete for SSE2 support, is
actually missing four logical-op interfaces:
_mm_and_si128
_mm_andnot_si128
_mm_or_si128
_mm_xor_si128
This patch provides those with the obvious implementations, along with
test cases. I've bootstrapped it on powerpc64le-linux-gnu (P8, P9)
and powerpc64-linux-gnu (P7, P8) and tested it with no regressions.
Is this okay for trunk?
Although this isn't a regression, it is an oversight that leaves the
SSE2 support incomplete. Thus I'd like to ask permission to also
backport this to gcc-8-branch after a short waiting period. It's
passed regstrap on P8 and P9 LE, and P7/P8 BE testing is underway.
Is that backport okay if testing succeeds?
[BTW, I'm shepherding this patch on behalf of Steve Munroe.]
Thanks!
Bill
[gcc]
2018-07-10 Bill Schmidt
Steve Munroe
* config/rs6000/emmintrin.h (_mm_and_si128): New function.
(_mm_andnot_si128): Likewise.
(_mm_or_si128): Likewise.
(_mm_xor_si128): Likewise.
[gcc/testsuite]
2018-07-10 Bill Schmidt
Steve Munroe
* gcc.target/powerpc/sse2-pand-1.c: New file.
* gcc.target/powerpc/sse2-pandn-1.c: Likewise.
* gcc.target/powerpc/sse2-por-1.c: Likewise.
* gcc.target/powerpc/sse2-pxor-1.c: Likewise.
Index: gcc/config/rs6000/emmintrin.h
===
--- gcc/config/rs6000/emmintrin.h (revision 262235)
+++ gcc/config/rs6000/emmintrin.h (working copy)
@@ -1884,6 +1884,30 @@
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)vec_and ((__v2di) __A, (__v2di) __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)vec_andc ((__v2di) __B, (__v2di) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)vec_or ((__v2di) __A, (__v2di) __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)vec_xor ((__v2di) __A, (__v2di) __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
{
return (__m128i) vec_cmpeq ((__v16qi) __A, (__v16qi)__B);
@@ -2333,3 +2357,4 @@
}
#endif /* EMMINTRIN_H_ */
+
Index: gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c
===
--- gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c (nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c (working copy)
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pand_1
+#endif
+
+#include
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_and_si128 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3,
4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ e[i] = s1.a[i] & s2.a[i];
+
+ if (check_union128i_b (u, e))
+abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c
===
--- gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c (nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c (working copy)
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pandn_1
+#endif
+
+#include
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_andnot_si128 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3,
4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ e[i] = (~s1.a[i]) & s2.a[i];
+
+ if (check_union128i_b (u, e))
+abort ();
+}
Index: