[PATCH, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative

2018-04-12 Thread Paul Clarke
The powerpc versions of _mm_slli_epi32 and __mm_slli_epi64 in emmintrin.h
do not properly handle shift values between 16 and 31, inclusive.
These were setting up the shift with vec_splat_s32, which only accepts
*5 bit signed* shift values, or a range of -16 to 15.  Values above 15
produced an error:

  error: argument 1 must be a 5-bit signed literal

Fix is to effectively reduce the range for which vec_splat_s32 is used
to < 32 and use vec_splats otherwise.

Also, __mm_slli_epi{16,32,64}, when given a negative shift value,
should always return a vector of {0}.

2018-04-12  Paul A. Clarke  

gcc/config

PR target/83402
* rs6000/emmintrin.h (_mm_slli_epi{16,32,64}):
Ensure that vec_splat_s32 is only called with 0 < shift < 16.
Ensure negative shifts result in {0}.

gcc/testsuite/gcc.target/powerpc

PR target/83402
* gcc.target/powerpc/sse2-psllw-1.c: Refactor and add tests for
several positive and negative values.
* gcc.target/powerpc/sse2-pslld-1.c: Same.
* gcc.target/powerpc/sse2-psllq-1.c: Same.

Index: gcc/config/rs6000/emmintrin.h
===
--- gcc/config/rs6000/emmintrin.h   (revision 259016)
+++ gcc/config/rs6000/emmintrin.h   (working copy)
@@ -1488,7 +1488,7 @@ _mm_slli_epi16 (__m128i __A, int __B)
   __v8hu lshift;
   __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
-  if (__B < 16)
+  if (__B > 0 && __B < 16)
 {
   if (__builtin_constant_p(__B))
  lshift = (__v8hu) vec_splat_s16(__B);
@@ -1507,12 +1507,12 @@ _mm_slli_epi32 (__m128i __A, int __B)
   __v4su lshift;
   __v4si result = { 0, 0, 0, 0 };
 
-  if (__B < 32)
+  if (__B > 0 && __B < 32)
 {
-  if (__builtin_constant_p(__B))
-   lshift = (__v4su) vec_splat_s32(__B);
+  if (__builtin_constant_p(__B) && __B < 16)
+lshift = (__v4su) vec_splat_s32(__B);
   else
-   lshift = vec_splats ((unsigned int) __B);
+lshift = vec_splats ((unsigned int) __B);
 
   result = vec_vslw ((__v4si) __A, lshift);
 }
@@ -1527,17 +1527,12 @@ _mm_slli_epi64 (__m128i __A, int __B)
   __v2du lshift;
   __v2di result = { 0, 0 };
 
-  if (__B < 64)
+  if (__B > 0 && __B < 64)
 {
-  if (__builtin_constant_p(__B))
-   {
- if (__B < 32)
- lshift = (__v2du) vec_splat_s32(__B);
-   else
- lshift = (__v2du) vec_splats((unsigned long long)__B);
-   }
+  if (__builtin_constant_p(__B) && __B < 16)
+   lshift = (__v2du) vec_splat_s32(__B);
   else
- lshift = (__v2du) vec_splats ((unsigned int) __B);
+   lshift = (__v2du) vec_splats ((unsigned int) __B);
 
   result = vec_vsld ((__v2di) __A, lshift);
 }
Index: gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c
===
--- gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c (revision 259016)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c (working copy)
@@ -13,32 +13,50 @@
 #define TEST sse2_test_pslld_1
 #endif
 
-#define N 0xf
-
 #include 
 
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi32 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (__m128i s1) \
+  { \
+return _mm_slli_epi32 (s1, N);  \
+  }
 
+TEST_FUNC(0, 0)
+TEST_FUNC(15, 15)
+TEST_FUNC(16, 16)
+TEST_FUNC(31, 31)
+TEST_FUNC(neg1, -1)
+TEST_FUNC(neg16, -16)
+TEST_FUNC(neg32, -32)
+TEST_FUNC(neg64, -64)
+TEST_FUNC(neg128, -128)
+
+#define TEST_CODE(id, N) \
+  { \
+int e[4] = {0}; \
+union128i_d u, s; \
+int i; \
+s.x = _mm_set_epi32 (1, -2, 3, 4); \
+u.x = test##id (s.x); \
+if (N > 0 && N < 32) \
+  for (i = 0; i < 4; i++) \
+e[i] = s.a[i] << (N * (N > 0)); \
+if (check_union128i_d (u, e)) \
+  abort (); \
+  }
+
 static void
 TEST (void)
 {
-  union128i_d u, s;
-  int e[4] = {0};
-  int i;
- 
-  s.x = _mm_set_epi32 (1, -2, 3, 4);
-
-  u.x = test (s.x);
-
-  if (N < 32)
-for (i = 0; i < 4; i++)
-  e[i] = s.a[i] << N; 
-
-  if (check_union128i_d (u, e))
-abort (); 
+  TEST_CODE(0, 0);
+  TEST_CODE(15, 15);
+  TEST_CODE(16, 16);
+  TEST_CODE(31, 31);
+  TEST_CODE(neg1, -1);
+  TEST_CODE(neg16, -16);
+  TEST_CODE(neg32, -32);
+  TEST_CODE(neg64, -64);
+  TEST_CODE(neg128, -128);
 }
Index: gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c
===
--- gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c (revision 259016)
+++ gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c (working copy)
@@ -13,36 +13,56 @@
 #define TEST sse2_test_psllq_1
 #endif
 
-#define N 60
-
 #include 
 
 #ifdef _ARCH_PWR8
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi64 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (

Re: [PATCH, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative

2018-04-13 Thread Segher Boessenkool
Hi!

On Thu, Apr 12, 2018 at 07:07:21PM -0500, Paul Clarke wrote:
> The powerpc versions of _mm_slli_epi32 and __mm_slli_epi64 in emmintrin.h
> do not properly handle shift values between 16 and 31, inclusive.
> These were setting up the shift with vec_splat_s32, which only accepts
> *5 bit signed* shift values, or a range of -16 to 15.  Values above 15
> produced an error:
> 
>   error: argument 1 must be a 5-bit signed literal
> 
> Fix is to effectively reduce the range for which vec_splat_s32 is used
> to < 32 and use vec_splats otherwise.
> 
> Also, __mm_slli_epi{16,32,64}, when given a negative shift value,
> should always return a vector of {0}.

Yup.

> 2018-04-12  Paul A. Clarke  
> 
> gcc/config

There is no changelog there; the changelog is in gcc/.  I'll fix it up.

> --- gcc/config/rs6000/emmintrin.h (revision 259016)
> +++ gcc/config/rs6000/emmintrin.h (working copy)
> @@ -1488,7 +1488,7 @@ _mm_slli_epi16 (__m128i __A, int __B)
>__v8hu lshift;
>__v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
>  
> -  if (__B < 16)
> +  if (__B > 0 && __B < 16)
>  {
>if (__builtin_constant_p(__B))
> lshift = (__v8hu) vec_splat_s16(__B);
> @@ -1507,12 +1507,12 @@ _mm_slli_epi32 (__m128i __A, int __B)
>__v4su lshift;
>__v4si result = { 0, 0, 0, 0 };
>  
> -  if (__B < 32)
> +  if (__B > 0 && __B < 32)

These should >= 0 (I'll fix it).

Thanks for the patch!  I'll commit it; please get your commit access in
order if you plan any further patches.

Cheers,


Segher


Re: [PATCH, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative

2018-04-13 Thread Paul Clarke
On 04/13/2018 02:37 PM, Segher Boessenkool wrote:
> On Thu, Apr 12, 2018 at 07:07:21PM -0500, Paul Clarke wrote:
>> The powerpc versions of _mm_slli_epi32 and __mm_slli_epi64 in emmintrin.h
>> do not properly handle shift values between 16 and 31, inclusive.
>> These were setting up the shift with vec_splat_s32, which only accepts
>> *5 bit signed* shift values, or a range of -16 to 15.  Values above 15
>> produced an error:
>>
>>   error: argument 1 must be a 5-bit signed literal
>>
>> Fix is to effectively reduce the range for which vec_splat_s32 is used
>> to < 32 and use vec_splats otherwise.
>>
>> Also, __mm_slli_epi{16,32,64}, when given a negative shift value,
>> should always return a vector of {0}.
> 
> Yup.
> 
>> 2018-04-12  Paul A. Clarke  
>>
>> gcc/config
> 
> There is no changelog there; the changelog is in gcc/.  I'll fix it up.

At some point, I'll need to figure out what that means, I guess.  :-)

>> --- gcc/config/rs6000/emmintrin.h(revision 259016)
>> +++ gcc/config/rs6000/emmintrin.h(working copy)
>> @@ -1488,7 +1488,7 @@ _mm_slli_epi16 (__m128i __A, int __B)
>>__v8hu lshift;
>>__v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
>>  
>> -  if (__B < 16)
>> +  if (__B > 0 && __B < 16)
>>  {
>>if (__builtin_constant_p(__B))
>>lshift = (__v8hu) vec_splat_s16(__B);
>> @@ -1507,12 +1507,12 @@ _mm_slli_epi32 (__m128i __A, int __B)
>>__v4su lshift;
>>__v4si result = { 0, 0, 0, 0 };
>>  
>> -  if (__B < 32)
>> +  if (__B > 0 && __B < 32)
> 
> These should >= 0 (I'll fix it).

Oh, you're right, of course.  That also means the test cases have to change.  
Shall I submit a new patch with those changes?

> Thanks for the patch!  I'll commit it; please get your commit access in
> order if you plan any further patches.

I'll work on that.

PC