When it's the memory operand which is to be inverted, using VPANDN* requires a further load instruction. The same can be achieved by a single VPTERNLOG*. Add two new alternatives (for plain memory and embedded broadcast), adjusting the predicate for the first operand accordingly.
Two pre-existing testcases actually end up being affected (improved) by the change, which is reflected in updated expectations there. gcc/ PR target/93768 * config/i386/sse.md (*andnot<mode>3): Add new alternatives for memory form operand 1. gcc/testsuite/ PR target/93768 * gcc.target/i386/avx512f-andn-di-zmm-2.c: New test. * gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations towards generated code. * gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit code. --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17210,11 +17210,13 @@ "TARGET_AVX512F") (define_insn "*andnot<mode>3" - [(set (match_operand:VI 0 "register_operand" "=x,x,v") + [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v") (and:VI - (not:VI (match_operand:VI 1 "vector_operand" "0,x,v")) - (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))] - "TARGET_SSE" + (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br")) + (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))] + "TARGET_SSE + && (register_operand (operands[1], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" { char buf[64]; const char *ops; @@ -17281,6 +17283,15 @@ case 2: ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 3: + case 4: + tmp = "pternlog"; + ssesuffix = "<ternlogsuffix>"; + if (which_alternative != 4 || TARGET_AVX512VL) + ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}"; + else + ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}"; + break; default: gcc_unreachable (); } @@ -17289,7 +17300,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx") + [(set_attr "isa" "noavx,avx,avx,*,*") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -17297,9 +17308,12 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex,evex,evex,evex") (set (attr "mode") - (cond [(match_test "TARGET_AVX2") + (cond [(and (eq_attr "alternative" "3,4") + (match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL")) + (const_string "XI") + (match_test "TARGET_AVX2") (const_string "<sseinsnmode>") (match_test "TARGET_AVX") (if_then_else @@ -17310,7 +17324,15 @@ (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] - (const_string "<sseinsnmode>")))]) + (const_string "<sseinsnmode>"))) + (set (attr "enabled") + (cond [(eq_attr "alternative" "3") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") + (eq_attr "alternative" "4") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") + ] + (const_string "*")))]) ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn (define_split --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ +/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ + +#define type __m512i +#define vec 512 +#define op andnot +#define suffix epi64 +#define SCALAR long long + +#include "avx512-binop-2.h" --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i #define vec 512 --- a/gcc/testsuite/gcc.target/i386/pr100711-3.c +++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c @@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b) return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b; } -/* { dg-final { scan-assembler-times "vpandn" 4 } } */ +/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */