Re: [PATCH v2 1/2] rs6000: Add support for _mm_minpos_epu16

2021-07-09 Thread Bill Schmidt via Gcc-patches

Hi Paul,

On 6/8/21 2:11 PM, Paul A. Clarke via Gcc-patches wrote:

Add a naive implementation of the subject x86 intrinsic to
ease porting.
"subject" won't be part of eventual commit, so please specify in commit 
blurb.


2021-06-08  Paul A. Clarke  

gcc/ChangeLog:
 * config/rs6000/smmintrin.h (_mm_minpos_epu16): New.
---
  gcc/config/rs6000/smmintrin.h | 25 +
  1 file changed, 25 insertions(+)

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index bdf6eb365d88..b7de38763f2b 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
  }

+/* Return horizontal packed word minimum and its index in bits [15:0]
+   and bits [18:16] respectively.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
Line too long, please break up.  (I realize this happens throughout this 
file already, but...)

+_mm_minpos_epu16 (__m128i __A)
+{
+  union __u
+{
+  __m128i __m;
+  __v8hu __uh;
+};
+  union __u __u = { .__m = __A }, __r = { .__m = {0} };
+  unsigned short __ridx = 0;
+  unsigned short __rmin = __u.__uh[__ridx];
+  for (unsigned long __i = __ridx + 1; __i < 8; __i++)

"__ridx + 1" can just be "1"

+{
+  if (__u.__uh[__i] < __rmin)
+{
+  __rmin = __u.__uh[__i];
+  __ridx = __i;
+}

Preceding four lines need tabs, not spaces.

+}
+  __r.__uh[0] = __rmin;
+  __r.__uh[1] = __ridx;
+  return __r.__m;
+}
  #endif


Otherwise LGTM.  I can't approve, but recommend approval with those 
things fixed.


Thanks,
Bill



Re: [ping PATCH v2 1/2] rs6000: Add support for _mm_minpos_epu16

2021-06-21 Thread Paul A. Clarke via Gcc-patches
Gentle ping.

I now realize I forgot to include a blurb about "what changed in v2".

v2:
- Slight formatting changes based on Segher's review (simplified
  condition, single line).

PC

On Tue, Jun 08, 2021 at 02:11:54PM -0500, Paul A. Clarke via Gcc-patches wrote:
> Add a naive implementation of the subject x86 intrinsic to
> ease porting.
> 
> 2021-06-08  Paul A. Clarke  
> 
> gcc/ChangeLog:
> * config/rs6000/smmintrin.h (_mm_minpos_epu16): New.
> ---
>  gcc/config/rs6000/smmintrin.h | 25 +
>  1 file changed, 25 insertions(+)
> 
> diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
> index bdf6eb365d88..b7de38763f2b 100644
> --- a/gcc/config/rs6000/smmintrin.h
> +++ b/gcc/config/rs6000/smmintrin.h
> @@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i 
> __mask)
>return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
>  }
>  
> +/* Return horizontal packed word minimum and its index in bits [15:0]
> +   and bits [18:16] respectively.  */
> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm_minpos_epu16 (__m128i __A)
> +{
> +  union __u
> +{
> +  __m128i __m;
> +  __v8hu __uh;
> +};
> +  union __u __u = { .__m = __A }, __r = { .__m = {0} };
> +  unsigned short __ridx = 0;
> +  unsigned short __rmin = __u.__uh[__ridx];
> +  for (unsigned long __i = __ridx + 1; __i < 8; __i++)
> +{
> +  if (__u.__uh[__i] < __rmin)
> +{
> +  __rmin = __u.__uh[__i];
> +  __ridx = __i;
> +}
> +}
> +  __r.__uh[0] = __rmin;
> +  __r.__uh[1] = __ridx;
> +  return __r.__m;
> +}
>  #endif
> -- 
> 2.27.0
> 


[PATCH v2 1/2] rs6000: Add support for _mm_minpos_epu16

2021-06-08 Thread Paul A. Clarke via Gcc-patches
Add a naive implementation of the subject x86 intrinsic to
ease porting.

2021-06-08  Paul A. Clarke  

gcc/ChangeLog:
* config/rs6000/smmintrin.h (_mm_minpos_epu16): New.
---
 gcc/config/rs6000/smmintrin.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index bdf6eb365d88..b7de38763f2b 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
   return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
 }
 
+/* Return horizontal packed word minimum and its index in bits [15:0]
+   and bits [18:16] respectively.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_minpos_epu16 (__m128i __A)
+{
+  union __u
+{
+  __m128i __m;
+  __v8hu __uh;
+};
+  union __u __u = { .__m = __A }, __r = { .__m = {0} };
+  unsigned short __ridx = 0;
+  unsigned short __rmin = __u.__uh[__ridx];
+  for (unsigned long __i = __ridx + 1; __i < 8; __i++)
+{
+  if (__u.__uh[__i] < __rmin)
+{
+  __rmin = __u.__uh[__i];
+  __ridx = __i;
+}
+}
+  __r.__uh[0] = __rmin;
+  __r.__uh[1] = __ridx;
+  return __r.__m;
+}
 #endif
-- 
2.27.0