Re: [PATCH v2 1/2] rs6000: Add support for _mm_minpos_epu16
Hi Paul, On 6/8/21 2:11 PM, Paul A. Clarke via Gcc-patches wrote: Add a naive implementation of the subject x86 intrinsic to ease porting. "subject" won't be part of eventual commit, so please specify in commit blurb. 2021-06-08 Paul A. Clarke gcc/ChangeLog: * config/rs6000/smmintrin.h (_mm_minpos_epu16): New. --- gcc/config/rs6000/smmintrin.h | 25 + 1 file changed, 25 insertions(+) diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index bdf6eb365d88..b7de38763f2b 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); } +/* Return horizontal packed word minimum and its index in bits [15:0] + and bits [18:16] respectively. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) Line too long, please break up. (I realize this happens throughout this file already, but...) +_mm_minpos_epu16 (__m128i __A) +{ + union __u +{ + __m128i __m; + __v8hu __uh; +}; + union __u __u = { .__m = __A }, __r = { .__m = {0} }; + unsigned short __ridx = 0; + unsigned short __rmin = __u.__uh[__ridx]; + for (unsigned long __i = __ridx + 1; __i < 8; __i++) "__ridx + 1" can just be "1" +{ + if (__u.__uh[__i] < __rmin) +{ + __rmin = __u.__uh[__i]; + __ridx = __i; +} Preceding four lines need tabs, not spaces. +} + __r.__uh[0] = __rmin; + __r.__uh[1] = __ridx; + return __r.__m; +} #endif Otherwise LGTM. I can't approve, but recommend approval with those things fixed. Thanks, Bill
Re: [ping PATCH v2 1/2] rs6000: Add support for _mm_minpos_epu16
Gentle ping. I now realize I forgot to include a blurb about "what changed in v2". v2: - Slight formatting changes based on Segher's review (simplified condition, single line). PC On Tue, Jun 08, 2021 at 02:11:54PM -0500, Paul A. Clarke via Gcc-patches wrote: > Add a naive implementation of the subject x86 intrinsic to > ease porting. > > 2021-06-08 Paul A. Clarke > > gcc/ChangeLog: > * config/rs6000/smmintrin.h (_mm_minpos_epu16): New. > --- > gcc/config/rs6000/smmintrin.h | 25 + > 1 file changed, 25 insertions(+) > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index bdf6eb365d88..b7de38763f2b 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i > __mask) >return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); > } > > +/* Return horizontal packed word minimum and its index in bits [15:0] > + and bits [18:16] respectively. */ > +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > +_mm_minpos_epu16 (__m128i __A) > +{ > + union __u > +{ > + __m128i __m; > + __v8hu __uh; > +}; > + union __u __u = { .__m = __A }, __r = { .__m = {0} }; > + unsigned short __ridx = 0; > + unsigned short __rmin = __u.__uh[__ridx]; > + for (unsigned long __i = __ridx + 1; __i < 8; __i++) > +{ > + if (__u.__uh[__i] < __rmin) > +{ > + __rmin = __u.__uh[__i]; > + __ridx = __i; > +} > +} > + __r.__uh[0] = __rmin; > + __r.__uh[1] = __ridx; > + return __r.__m; > +} > #endif > -- > 2.27.0 >
[PATCH v2 1/2] rs6000: Add support for _mm_minpos_epu16
Add a naive implementation of the subject x86 intrinsic to ease porting. 2021-06-08 Paul A. Clarke gcc/ChangeLog: * config/rs6000/smmintrin.h (_mm_minpos_epu16): New. --- gcc/config/rs6000/smmintrin.h | 25 + 1 file changed, 25 insertions(+) diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index bdf6eb365d88..b7de38763f2b 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); } +/* Return horizontal packed word minimum and its index in bits [15:0] + and bits [18:16] respectively. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_minpos_epu16 (__m128i __A) +{ + union __u +{ + __m128i __m; + __v8hu __uh; +}; + union __u __u = { .__m = __A }, __r = { .__m = {0} }; + unsigned short __ridx = 0; + unsigned short __rmin = __u.__uh[__ridx]; + for (unsigned long __i = __ridx + 1; __i < 8; __i++) +{ + if (__u.__uh[__i] < __rmin) +{ + __rmin = __u.__uh[__i]; + __ridx = __i; +} +} + __r.__uh[0] = __rmin; + __r.__uh[1] = __ridx; + return __r.__m; +} #endif -- 2.27.0