https://gcc.gnu.org/g:06129071b510e29b437d2644c16ca3505a912d31
commit r16-4547-g06129071b510e29b437d2644c16ca3505a912d31 Author: liuhongt <[email protected]> Date: Mon Oct 20 01:42:32 2025 -0700 Simplify avx512 vector integer comparison when 2 operands are known equal For comparison NEQ/LT/NLE, it's simplified to 0. For comparison LE/EQ/NLT, it's simplied to (1u << nelt) - 1 gcc/ChangeLog: PR target/122320 * config/i386/sse.md (*<avx512>_cmp<mode>3_dup_op): New define_insn_and_split. gcc/testsuite/ChangeLog: * gcc.target/i386/pr122320-mask16.c: New test. * gcc.target/i386/pr122320-mask2.c: New test. * gcc.target/i386/pr122320-mask32.c: New test. * gcc.target/i386/pr122320-mask4.c: New test. * gcc.target/i386/pr122320-mask64.c: New test. * gcc.target/i386/pr122320-mask8.c: New test. Diff: --- gcc/config/i386/sse.md | 27 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr122320-mask16.c | 32 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr122320-mask2.c | 32 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr122320-mask32.c | 32 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr122320-mask4.c | 32 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr122320-mask64.c | 32 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr122320-mask8.c | 32 +++++++++++++++++++++++++ 7 files changed, 219 insertions(+) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8b28c8edb19b..4ad17f67b9dc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4632,6 +4632,33 @@ UNSPEC_PCMP_ITER))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_insn_and_split "*<avx512>_cmp<mode>3_dup_op" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI1248_AVX512VLBW 1 "general_operand") + (match_operand:VI1248_AVX512VLBW 2 "general_operand") + (match_operand:SI 3 "<cmp_imm_predicate>")] + UNSPEC_PCMP_ITER))] + "TARGET_AVX512F && ix86_pre_reload_split () + && rtx_equal_p (operands[1], operands[2])" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 4))] +{ + int cmp_imm = INTVAL (operands[3]); + rtx res = CONST0_RTX (<avx512fmaskmode>mode); + /* EQ/LE/NLT. */ + if (cmp_imm == 0 || cmp_imm == 2 || cmp_imm == 5) + { + int nelts = GET_MODE_NUNITS (<MODE>mode); + if (nelts >= 8) + res = CONSTM1_RTX (<avx512fmaskmode>mode); + else + res = gen_int_mode ((1u << nelts) - 1, QImode); + } + operands[4] = res; +}) + (define_insn "*<avx512>_eq<mode>3<mask_scalar_merge_name>_1" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k") (unspec:<avx512fmaskmode> diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask16.c b/gcc/testsuite/gcc.target/i386/pr122320-mask16.c new file mode 100644 index 000000000000..2796d748d468 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask16.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include <immintrin.h> + +__mmask16 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi32_mask (vx, vx, 0); +} + +__mmask16 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi32_mask (vx, vx, 1); +} + +__mmask16 dumpy_le (__m512i vx){ + return _mm512_cmp_epi32_mask (vx, vx, 2); +} + +__mmask16 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi32_mask (vx, vx, 4); +} + +__mmask16 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi32_mask (vx, vx, 5); +} + +__mmask16 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi32_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask2.c b/gcc/testsuite/gcc.target/i386/pr122320-mask2.c new file mode 100644 index 000000000000..bcbc47aef5c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask2.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include <immintrin.h> + +__mmask8 dumpy_eq (__m128i vx){ + return _mm_cmp_epi64_mask (vx, vx, 0); +} + +__mmask8 dumpy_lt (__m128i vx) +{ + return _mm_cmp_epi64_mask (vx, vx, 1); +} + +__mmask8 dumpy_le (__m128i vx){ + return _mm_cmp_epi64_mask (vx, vx, 2); +} + +__mmask8 dumpy_ne (__m128i vx) +{ + return _mm_cmp_epi64_mask (vx, vx, 4); +} + +__mmask8 dumpy_nlt (__m128i vx) +{ + return _mm_cmp_epi64_mask (vx, vx, 5); +} + +__mmask8 dumpy_nle (__m128i vx){ + return _mm_cmp_epi64_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask32.c b/gcc/testsuite/gcc.target/i386/pr122320-mask32.c new file mode 100644 index 000000000000..d75c8b0dfac2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask32.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include <immintrin.h> + +__mmask32 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi16_mask (vx, vx, 0); +} + +__mmask32 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi16_mask (vx, vx, 1); +} + +__mmask32 dumpy_le (__m512i vx){ + return _mm512_cmp_epi16_mask (vx, vx, 2); +} + +__mmask32 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi16_mask (vx, vx, 4); +} + +__mmask32 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi16_mask (vx, vx, 5); +} + +__mmask32 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi16_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask4.c b/gcc/testsuite/gcc.target/i386/pr122320-mask4.c new file mode 100644 index 000000000000..7f2ec7d5f220 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask4.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include <immintrin.h> + +__mmask8 dumpy_eq (__m256i vx){ + return _mm256_cmp_epi64_mask (vx, vx, 0); +} + +__mmask8 dumpy_lt (__m256i vx) +{ + return _mm256_cmp_epi64_mask (vx, vx, 1); +} + +__mmask8 dumpy_le (__m256i vx){ + return _mm256_cmp_epi64_mask (vx, vx, 2); +} + +__mmask8 dumpy_ne (__m256i vx) +{ + return _mm256_cmp_epi64_mask (vx, vx, 4); +} + +__mmask8 dumpy_nlt (__m256i vx) +{ + return _mm256_cmp_epi64_mask (vx, vx, 5); +} + +__mmask8 dumpy_nle (__m256i vx){ + return _mm256_cmp_epi64_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask64.c b/gcc/testsuite/gcc.target/i386/pr122320-mask64.c new file mode 100644 index 000000000000..6a7ce5112c38 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask64.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include <immintrin.h> + +__mmask64 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi8_mask (vx, vx, 0); +} + +__mmask64 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi8_mask (vx, vx, 1); +} + +__mmask64 dumpy_le (__m512i vx){ + return _mm512_cmp_epi8_mask (vx, vx, 2); +} + +__mmask64 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi8_mask (vx, vx, 4); +} + +__mmask64 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi8_mask (vx, vx, 5); +} + +__mmask64 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi8_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask8.c b/gcc/testsuite/gcc.target/i386/pr122320-mask8.c new file mode 100644 index 000000000000..e724a68e7eb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask8.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include <immintrin.h> + +__mmask8 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi64_mask (vx, vx, 0); +} + +__mmask8 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi64_mask (vx, vx, 1); +} + +__mmask8 dumpy_le (__m512i vx){ + return _mm512_cmp_epi64_mask (vx, vx, 2); +} + +__mmask8 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi64_mask (vx, vx, 4); +} + +__mmask8 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi64_mask (vx, vx, 5); +} + +__mmask8 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi64_mask (vx, vx, 6); +}
