On Thu, Feb 13, 2020 at 9:47 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> As mentioned in the PR and as
> https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mask_popcnt_epi
> also documents, _mm*_popcnt_epi* intrinsics are consistent with all other
> unary AVX512* intrinsics regarding arguments, i.e. the
> _mm*_whatever has just single argument (called a in the docs, and __A in the
> GCC headers),
> _mm*_mask_whatever has 3 arguments (called src, k, a in the docs and
> _W, __U, __A in GCC headers) and
> _mm*_maskz_whatever 2 arguments (called k, a in the docs and __U, __A in GCC
> headers).  Unfortunately, whomever implemented the _mm*_popcnt_epi*
> intrinsics got it wrong for the _mm*_mask_popcnt_epi* ones, calling the
> args __A, __U, __B and not passing them in the canonical order to the
> builtins, making it API incompatible with ICC as well as clang (tested on
> godbolts clang 7/8/9/trunk and ICC 19.0.{0,1}, older clang/ICC don't
> understand those, so it isn't that it used to be broken even in other
> compilers and got changed afterwards).
>
> The following patch fixes it, bootstrapped/regtested on x86_64-linux and
> i686-linux, ok for trunk?  Not really sure about release branches, perhaps
> with big fat warning in gcc-{8,9}/changes.html ?

OK for trunk and gcc-9 branch, so one can say that gcc version > 9.2 is OK.

What to do with gcc-8 branch? We had the same situation in the past,
we silently fixed it, so I think we should fix it on gcc-8 branch,
too. I guess that users understand and assume by now that these
headers are in some flux as far as new ISAs are concerned, so the
latest branch release should be used when new(ish) ISAs are used.

Uros.

> 2020-02-13  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/93696
>         * config/i386/avx512bitalgintrin.h (_mm512_mask_popcnt_epi8,
>         _mm512_mask_popcnt_epi16, _mm256_mask_popcnt_epi8,
>         _mm256_mask_popcnt_epi16, _mm_mask_popcnt_epi8,
>         _mm_mask_popcnt_epi16): Rename __B argument to __A and __A to __W,
>         pass __A to the builtin followed by __W instead of __A followed by
>         __B.
>         * config/i386/avx512vpopcntdqintrin.h (_mm512_mask_popcnt_epi32,
>         _mm512_mask_popcnt_epi64): Likewise.
>         * config/i386/avx512vpopcntdqvlintrin.h (_mm_mask_popcnt_epi32,
>         _mm256_mask_popcnt_epi32, _mm_mask_popcnt_epi64,
>         _mm256_mask_popcnt_epi64): Likewise.
>
>         * gcc.target/i386/pr93696-1.c: New test.
>         * gcc.target/i386/pr93696-2.c: New test.
>         * gcc.target/i386/avx512bitalg-vpopcntw-1.c (TEST): Fix argument order
>         of _mm*_mask_popcnt_*.
>         * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c (TEST): Likewise.
>         * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c (TEST): Likewise.
>         * gcc.target/i386/avx512bitalg-vpopcntb-1.c (TEST): Likewise.
>         * gcc.target/i386/avx512bitalg-vpopcntb.c (foo): Likewise.
>         * gcc.target/i386/avx512bitalg-vpopcntbvl.c (foo): Likewise.
>         * gcc.target/i386/avx512vpopcntdq-vpopcntd.c (foo): Likewise.
>         * gcc.target/i386/avx512bitalg-vpopcntwvl.c (foo): Likewise.
>         * gcc.target/i386/avx512bitalg-vpopcntw.c (foo): Likewise.
>         * gcc.target/i386/avx512vpopcntdq-vpopcntq.c (foo): Likewise.
>
> --- gcc/config/i386/avx512bitalgintrin.h.jj     2020-02-12 11:43:57.183690204 
> +0100
> +++ gcc/config/i386/avx512bitalgintrin.h        2020-02-13 09:01:59.839598980 
> +0100
> @@ -61,10 +61,10 @@ _mm512_popcnt_epi16 (__m512i __A)
>
>  extern __inline __m512i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B)
> +_mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
>  {
>    return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
> -                                                        (__v64qi) __B,
> +                                                        (__v64qi) __W,
>                                                          (__mmask64) __U);
>  }
>
> @@ -79,10 +79,10 @@ _mm512_maskz_popcnt_epi8 (__mmask64 __U,
>  }
>  extern __inline __m512i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B)
> +_mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
>  {
>    return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
> -                                                       (__v32hi) __B,
> +                                                       (__v32hi) __W,
>                                                         (__mmask32) __U);
>  }
>
> @@ -127,10 +127,10 @@ _mm512_mask_bitshuffle_epi64_mask (__mma
>
>  extern __inline __m256i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B)
> +_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
>  {
>    return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
> -                                                        (__v32qi) __B,
> +                                                        (__v32qi) __W,
>                                                          (__mmask32) __U);
>  }
>
> @@ -222,10 +222,10 @@ _mm_popcnt_epi16 (__m128i __A)
>
>  extern __inline __m256i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_popcnt_epi16 (__m256i __A, __mmask16 __U, __m256i __B)
> +_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
>  {
>    return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
> -                                                       (__v16hi) __B,
> +                                                       (__v16hi) __W,
>                                                         (__mmask16) __U);
>  }
>
> @@ -241,10 +241,10 @@ _mm256_maskz_popcnt_epi16 (__mmask16 __U
>
>  extern __inline __m128i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_popcnt_epi8 (__m128i __A, __mmask16 __U, __m128i __B)
> +_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
>  {
>    return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
> -                                                        (__v16qi) __B,
> +                                                        (__v16qi) __W,
>                                                          (__mmask16) __U);
>  }
>
> @@ -259,10 +259,10 @@ _mm_maskz_popcnt_epi8 (__mmask16 __U, __
>  }
>  extern __inline __m128i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_popcnt_epi16 (__m128i __A, __mmask8 __U, __m128i __B)
> +_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
>  {
>    return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
> -                                                       (__v8hi) __B,
> +                                                       (__v8hi) __W,
>                                                         (__mmask8) __U);
>  }
>
> --- gcc/config/i386/avx512vpopcntdqintrin.h.jj  2020-02-12 11:43:57.213689757 
> +0100
> +++ gcc/config/i386/avx512vpopcntdqintrin.h     2020-02-13 09:01:59.854598755 
> +0100
> @@ -43,10 +43,10 @@ _mm512_popcnt_epi32 (__m512i __A)
>
>  extern __inline __m512i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_popcnt_epi32 (__m512i __A, __mmask16 __U, __m512i __B)
> +_mm512_mask_popcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
>  {
>    return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
> -                                                        (__v16si) __B,
> +                                                        (__v16si) __W,
>                                                          (__mmask16) __U);
>  }
>
> @@ -69,10 +69,10 @@ _mm512_popcnt_epi64 (__m512i __A)
>
>  extern __inline __m512i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_popcnt_epi64 (__m512i __A, __mmask8 __U, __m512i __B)
> +_mm512_mask_popcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
>  {
>    return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
> -                                                       (__v8di) __B,
> +                                                       (__v8di) __W,
>                                                         (__mmask8) __U);
>  }
>
> --- gcc/config/i386/avx512vpopcntdqvlintrin.h.jj        2020-02-12 
> 11:43:57.235689425 +0100
> +++ gcc/config/i386/avx512vpopcntdqvlintrin.h   2020-02-13 09:01:59.874598454 
> +0100
> @@ -43,10 +43,10 @@ _mm_popcnt_epi32 (__m128i __A)
>
>  extern __inline __m128i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_popcnt_epi32 (__m128i __A, __mmask16 __U, __m128i __B)
> +_mm_mask_popcnt_epi32 (__m128i __W, __mmask16 __U, __m128i __A)
>  {
>    return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
> -                                                        (__v4si) __B,
> +                                                        (__v4si) __W,
>                                                          (__mmask16) __U);
>  }
>
> @@ -69,10 +69,10 @@ _mm256_popcnt_epi32 (__m256i __A)
>
>  extern __inline __m256i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_popcnt_epi32 (__m256i __A, __mmask16 __U, __m256i __B)
> +_mm256_mask_popcnt_epi32 (__m256i __W, __mmask16 __U, __m256i __A)
>  {
>    return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
> -                                                        (__v8si) __B,
> +                                                        (__v8si) __W,
>                                                          (__mmask16) __U);
>  }
>
> @@ -95,10 +95,10 @@ _mm_popcnt_epi64 (__m128i __A)
>
>  extern __inline __m128i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_popcnt_epi64 (__m128i __A, __mmask8 __U, __m128i __B)
> +_mm_mask_popcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
>  {
>    return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
> -                                                       (__v2di) __B,
> +                                                       (__v2di) __W,
>                                                         (__mmask8) __U);
>  }
>
> @@ -121,10 +121,10 @@ _mm256_popcnt_epi64 (__m256i __A)
>
>  extern __inline __m256i
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_popcnt_epi64 (__m256i __A, __mmask8 __U, __m256i __B)
> +_mm256_mask_popcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
>  {
>    return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
> -                                                       (__v4di) __B,
> +                                                       (__v4di) __W,
>                                                         (__mmask8) __U);
>  }
>
> @@ -144,4 +144,3 @@ _mm256_maskz_popcnt_epi64 (__mmask8 __U,
>  #endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
>
>  #endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
> -
> --- gcc/testsuite/gcc.target/i386/pr93696-1.c.jj        2020-02-13 
> 09:01:59.878598394 +0100
> +++ gcc/testsuite/gcc.target/i386/pr93696-1.c   2020-02-13 09:01:59.878598394 
> +0100
> @@ -0,0 +1,79 @@
> +/* PR target/93696 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512bitalg -mavx512vpopcntdq -mavx512vl -mavx512bw 
> -masm=att" } */
> +/* { dg-final { scan-assembler-times "vpopcnt\[bwdq]\t%\[xyz]mm1, 
> %\[xyz]mm0\{%k\[0-7]\}\[^\{]" 12 } } */
> +/* { dg-final { scan-assembler-not "vmovdq\[au]\[0-9]" } } */
> +
> +#include <x86intrin.h>
> +
> +__m128i
> +f1 (__m128i x, __mmask8 m, __m128i y)
> +{
> +  return _mm_mask_popcnt_epi64 (x, m, y);
> +}
> +
> +__m128i
> +f2 (__m128i x, __mmask8 m, __m128i y)
> +{
> +  return _mm_mask_popcnt_epi32 (x, m, y);
> +}
> +
> +__m128i
> +f3 (__m128i x, __mmask8 m, __m128i y)
> +{
> +  return _mm_mask_popcnt_epi16 (x, m, y);
> +}
> +
> +__m128i
> +f4 (__m128i x, __mmask16 m, __m128i y)
> +{
> +  return _mm_mask_popcnt_epi8 (x, m, y);
> +}
> +
> +__m256i
> +f5 (__m256i x, __mmask8 m, __m256i y)
> +{
> +  return _mm256_mask_popcnt_epi64 (x, m, y);
> +}
> +
> +__m256i
> +f6 (__m256i x, __mmask8 m, __m256i y)
> +{
> +  return _mm256_mask_popcnt_epi32 (x, m, y);
> +}
> +
> +__m256i
> +f7 (__m256i x, __mmask16 m, __m256i y)
> +{
> +  return _mm256_mask_popcnt_epi16 (x, m, y);
> +}
> +
> +__m256i
> +f8 (__m256i x, __mmask32 m, __m256i y)
> +{
> +  return _mm256_mask_popcnt_epi8 (x, m, y);
> +}
> +
> +__m512i
> +f9 (__m512i x, __mmask8 m, __m512i y)
> +{
> +  return _mm512_mask_popcnt_epi64 (x, m, y);
> +}
> +
> +__m512i
> +f10 (__m512i x, __mmask16 m, __m512i y)
> +{
> +  return _mm512_mask_popcnt_epi32 (x, m, y);
> +}
> +
> +__m512i
> +f11 (__m512i x, __mmask32 m, __m512i y)
> +{
> +  return _mm512_mask_popcnt_epi16 (x, m, y);
> +}
> +
> +__m512i
> +f12 (__m512i x, __mmask64 m, __m512i y)
> +{
> +  return _mm512_mask_popcnt_epi8 (x, m, y);
> +}
> --- gcc/testsuite/gcc.target/i386/pr93696-2.c.jj        2020-02-13 
> 09:01:59.878598394 +0100
> +++ gcc/testsuite/gcc.target/i386/pr93696-2.c   2020-02-13 09:01:59.878598394 
> +0100
> @@ -0,0 +1,79 @@
> +/* PR target/93696 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512bitalg -mavx512vpopcntdq -mavx512vl -mavx512bw 
> -masm=att" } */
> +/* { dg-final { scan-assembler-times "vpopcnt\[bwdq]\t%\[xyz]mm1, 
> %\[xyz]mm0\{%k\[0-7]\}\{z\}" 12 } } */
> +/* { dg-final { scan-assembler-not "vmovdq\[au]\[0-9]" } } */
> +
> +#include <x86intrin.h>
> +
> +__m128i
> +f1 (__m128i x, __mmask8 m, __m128i y)
> +{
> +  return _mm_maskz_popcnt_epi64 (m, y);
> +}
> +
> +__m128i
> +f2 (__m128i x, __mmask8 m, __m128i y)
> +{
> +  return _mm_maskz_popcnt_epi32 (m, y);
> +}
> +
> +__m128i
> +f3 (__m128i x, __mmask8 m, __m128i y)
> +{
> +  return _mm_maskz_popcnt_epi16 (m, y);
> +}
> +
> +__m128i
> +f4 (__m128i x, __mmask16 m, __m128i y)
> +{
> +  return _mm_maskz_popcnt_epi8 (m, y);
> +}
> +
> +__m256i
> +f5 (__m256i x, __mmask8 m, __m256i y)
> +{
> +  return _mm256_maskz_popcnt_epi64 (m, y);
> +}
> +
> +__m256i
> +f6 (__m256i x, __mmask8 m, __m256i y)
> +{
> +  return _mm256_maskz_popcnt_epi32 (m, y);
> +}
> +
> +__m256i
> +f7 (__m256i x, __mmask16 m, __m256i y)
> +{
> +  return _mm256_maskz_popcnt_epi16 (m, y);
> +}
> +
> +__m256i
> +f8 (__m256i x, __mmask32 m, __m256i y)
> +{
> +  return _mm256_maskz_popcnt_epi8 (m, y);
> +}
> +
> +__m512i
> +f9 (__m512i x, __mmask8 m, __m512i y)
> +{
> +  return _mm512_maskz_popcnt_epi64 (m, y);
> +}
> +
> +__m512i
> +f10 (__m512i x, __mmask16 m, __m512i y)
> +{
> +  return _mm512_maskz_popcnt_epi32 (m, y);
> +}
> +
> +__m512i
> +f11 (__m512i x, __mmask32 m, __m512i y)
> +{
> +  return _mm512_maskz_popcnt_epi16 (m, y);
> +}
> +
> +__m512i
> +f12 (__m512i x, __mmask64 m, __m512i y)
> +{
> +  return _mm512_maskz_popcnt_epi8 (m, y);
> +}
> --- gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c.jj  2020-01-12 
> 11:54:37.885391200 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c     2020-02-13 
> 09:07:08.727954510 +0100
> @@ -41,7 +41,7 @@ TEST (void)
>    }
>
>    res1.x = INTRINSIC (_popcnt_epi16)       (src.x);
> -  res2.x = INTRINSIC (_mask_popcnt_epi16)  (src.x, mask, src0.x);
> +  res2.x = INTRINSIC (_mask_popcnt_epi16)  (src0.x, mask, src.x);
>    res3.x = INTRINSIC (_maskz_popcnt_epi16) (mask, src.x);
>
>    if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
> --- gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c.jj       
> 2020-01-12 11:54:37.936390431 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c  2020-02-13 
> 09:09:19.622986364 +0100
> @@ -40,7 +40,7 @@ TEST (void)
>    }
>
>    res1.x = INTRINSIC (_popcnt_epi64)       (src.x);
> -  res2.x = INTRINSIC (_mask_popcnt_epi64)  (src.x, mask, src0.x);
> +  res2.x = INTRINSIC (_mask_popcnt_epi64)  (src0.x, mask, src.x);
>    res3.x = INTRINSIC (_maskz_popcnt_epi64) (mask, src.x);
>
>    if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
> --- gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c.jj       
> 2020-01-12 11:54:37.936390431 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c  2020-02-13 
> 09:08:35.249653564 +0100
> @@ -40,7 +40,7 @@ TEST (void)
>    }
>
>    res1.x = INTRINSIC (_popcnt_epi32)       (src.x);
> -  res2.x = INTRINSIC (_mask_popcnt_epi32)  (src.x, mask, src0.x);
> +  res2.x = INTRINSIC (_mask_popcnt_epi32)  (src0.x, mask, src.x);
>    res3.x = INTRINSIC (_maskz_popcnt_epi32) (mask, src.x);
>
>    if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
> --- gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c.jj  2020-01-12 
> 11:54:37.885391200 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c     2020-02-13 
> 09:05:57.218029740 +0100
> @@ -41,7 +41,7 @@ TEST (void)
>    }
>
>    res1.x = INTRINSIC (_popcnt_epi8)       (src.x);
> -  res2.x = INTRINSIC (_mask_popcnt_epi8)  (src.x, mask, src0.x);
> +  res2.x = INTRINSIC (_mask_popcnt_epi8)  (src0.x, mask, src.x);
>    res3.x = INTRINSIC (_maskz_popcnt_epi8) (mask, src.x);
>
>    if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
> --- gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c.jj    2020-01-12 
> 11:54:37.885391200 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c       2020-02-13 
> 09:06:22.296652650 +0100
> @@ -13,7 +13,7 @@ int foo ()
>    __mmask16 msk;
>    __m512i c = _mm512_popcnt_epi8 (z);
>    asm volatile ("" : "+v" (c));
> -  c = _mm512_mask_popcnt_epi8 (z, msk, z1);
> +  c = _mm512_mask_popcnt_epi8 (z1, msk, z);
>    asm volatile ("" : "+v" (c));
>    c = _mm512_maskz_popcnt_epi8 (msk, z);
>    asm volatile ("" : "+v" (c));
> --- gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c.jj  2020-01-12 
> 11:54:37.885391200 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c     2020-02-13 
> 09:06:50.655226263 +0100
> @@ -18,13 +18,13 @@ int foo ()
>    __mmask16 msk16;
>    __m256i c256 = _mm256_popcnt_epi8 (y);
>    asm volatile ("" : "+v" (c256));
> -  c256 = _mm256_mask_popcnt_epi8 (y, msk32, y_1);
> +  c256 = _mm256_mask_popcnt_epi8 (y_1, msk32, y);
>    asm volatile ("" : "+v" (c256));
>    c256 = _mm256_maskz_popcnt_epi8 (msk32, y);
>    asm volatile ("" : "+v" (c256));
>    __m128i c128 = _mm_popcnt_epi8 (x);
>    asm volatile ("" : "+v" (c128));
> -  c128 = _mm_mask_popcnt_epi8 (x, msk16, x_1);
> +  c128 = _mm_mask_popcnt_epi8 (x_1, msk16, x);
>    asm volatile ("" : "+v" (c128));
>    c128 = _mm_maskz_popcnt_epi8 (msk16, x);
>    asm volatile ("" : "+v" (c128));
> --- gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c.jj 2020-01-12 
> 11:54:37.936390431 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c    2020-02-13 
> 09:09:05.665196232 +0100
> @@ -22,19 +22,19 @@ int foo ()
>    __mmask8 msk8;
>    __m128i a = _mm_popcnt_epi32 (x);
>    asm volatile ("" : "+v" (a));
> -  a = _mm_mask_popcnt_epi32 (x, msk8, x_1);
> +  a = _mm_mask_popcnt_epi32 (x_1, msk8, x);
>    asm volatile ("" : "+v" (a));
>    a = _mm_maskz_popcnt_epi32 (msk8, x);
>    asm volatile ("" : "+v" (a));
>    __m256i b = _mm256_popcnt_epi32 (y);
>    asm volatile ("" : "+v" (b));
> -  b = _mm256_mask_popcnt_epi32 (y, msk8, y_1);
> +  b = _mm256_mask_popcnt_epi32 (y_1, msk8, y);
>    asm volatile ("" : "+v" (b));
>    b = _mm256_maskz_popcnt_epi32 (msk8, y);
>    asm volatile ("" : "+v" (b));
>    __m512i c = _mm512_popcnt_epi32 (z);
>    asm volatile ("" : "+v" (c));
> -  c = _mm512_mask_popcnt_epi32 (z, msk, z_1);
> +  c = _mm512_mask_popcnt_epi32 (z_1, msk, z);
>    asm volatile ("" : "+v" (c));
>    c = _mm512_maskz_popcnt_epi32 (msk, z);
>    asm volatile ("" : "+v" (c));
> --- gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c.jj  2020-01-12 
> 11:54:37.885391200 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c     2020-02-13 
> 09:07:52.621294528 +0100
> @@ -18,13 +18,13 @@ int foo ()
>    __mmask8 msk8;
>    __m256i c256 = _mm256_popcnt_epi16 (y);
>    asm volatile ("" : "+v" (c256));
> -  c256 = _mm256_mask_popcnt_epi16 (y, msk16, y_1);
> +  c256 = _mm256_mask_popcnt_epi16 (y_1, msk16, y);
>    asm volatile ("" : "+v" (c256));
>    c256 = _mm256_maskz_popcnt_epi16 (msk16, y);
>    asm volatile ("" : "+v" (c256));
>    __m128i c128 = _mm_popcnt_epi16 (x);
>    asm volatile ("" : "+v" (c128));
> -  c128 = _mm_mask_popcnt_epi16 (x, msk8, x_1);
> +  c128 = _mm_mask_popcnt_epi16 (x_1, msk8, x);
>    asm volatile ("" : "+v" (c128));
>    c128 = _mm_maskz_popcnt_epi16 (msk8, x);
>    asm volatile ("" : "+v" (c128));
> --- gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c.jj    2020-01-12 
> 11:54:37.885391200 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c       2020-02-13 
> 09:07:28.140662623 +0100
> @@ -13,7 +13,7 @@ int foo ()
>    __mmask16 msk;
>    __m512i c = _mm512_popcnt_epi16 (z);
>    asm volatile ("" : "+v" (c));
> -  c = _mm512_mask_popcnt_epi16 (z, msk, z1);
> +  c = _mm512_mask_popcnt_epi16 (z1, msk, z);
>    asm volatile ("" : "+v" (c));
>    c = _mm512_maskz_popcnt_epi16 (msk, z);
>    asm volatile ("" : "+v" (c));
> --- gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c.jj 2020-01-12 
> 11:54:37.936390431 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c    2020-02-13 
> 09:09:44.786608000 +0100
> @@ -21,19 +21,19 @@ int foo ()
>    __mmask8 msk;
>    __m128i a = _mm_popcnt_epi64 (x);
>    asm volatile ("" : "+v" (a));
> -  a = _mm_mask_popcnt_epi64 (x, msk, x_1);
> +  a = _mm_mask_popcnt_epi64 (x_1, msk, x);
>    asm volatile ("" : "+v" (a));
>    a = _mm_maskz_popcnt_epi64 (msk, x);
>    asm volatile ("" : "+v" (a));
>    __m256i b = _mm256_popcnt_epi64 (y);
>    asm volatile ("" : "+v" (b));
> -  b = _mm256_mask_popcnt_epi64 (y, msk, y_1);
> +  b = _mm256_mask_popcnt_epi64 (y_1, msk, y);
>    asm volatile ("" : "+v" (b));
>    b = _mm256_maskz_popcnt_epi64 (msk, y);
>    asm volatile ("" : "+v" (b));
>    __m512i c = _mm512_popcnt_epi64 (z);
>    asm volatile ("" : "+v" (c));
> -  c = _mm512_mask_popcnt_epi64 (z, msk, z_1);
> +  c = _mm512_mask_popcnt_epi64 (z_1, msk, z);
>    asm volatile ("" : "+v" (c));
>    c = _mm512_maskz_popcnt_epi64 (msk, z);
>    asm volatile ("" : "+v" (c));
>
>         Jakub
>

Reply via email to