gcc/ChangeLog: PR target/115517 * config/i386/sse.md (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt_avx512): New define_insn_and_split. (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt_avx512): Ditto. (*<sse2_avx2>_pmovmskb_lt_avx512): Ditto. (*<sse2_avx2>_pmovmskb_zext_lt_avx512): Ditto. (*sse2_pmovmskb_ext_lt_avx512): Ditto. (*pmovsk_kmask_v16qi_avx512): Ditto. (*pmovsk_mask_v32qi_avx512): Ditto. (*pmovsk_mask_cmp_<mode>_avx512): Ditto. (*pmovsk_ptest_<mode>_avx512): Ditto. --- gcc/config/i386/sse.md | 232 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 209 insertions(+), 23 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 92f8b74999f..5996ad99606 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10049,24 +10049,6 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>" [(set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not" - [(set (match_operand:VI12_AVX512VL 0 "register_operand") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 2 "const0_operand") - (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand") - (match_operand:<avx512fmaskmode> 1 "register_operand")))] - "TARGET_AVX512BW && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 4) - (not:<avx512fmaskmode> (match_dup 1))) - (set (match_dup 0) - (vec_merge:VI12_AVX512VL - (match_dup 3) - (match_dup 2) - (match_dup 4)))] - "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);") - (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>" [(set (match_operand:VI48_AVX512VL 0 "register_operand") (vec_merge:VI48_AVX512VL @@ -10106,10 +10088,10 @@ (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>" (set_attr "mode" "<sseinsnmode>")]) (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not" - [(set (match_operand:VI48_AVX512VL 0 "register_operand") - (vec_merge:VI48_AVX512VL - (match_operand:VI48_AVX512VL 2 "const0_operand") - (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand") + [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand") + (vec_merge:VI1248_AVX512VLBW + (match_operand:VI1248_AVX512VLBW 2 "const0_operand") + (match_operand:VI1248_AVX512VLBW 3 "vector_all_ones_operand") (match_operand:<avx512fmaskmode> 1 "register_operand")))] "TARGET_AVX512F && ix86_pre_reload_split ()" "#" @@ -10117,7 +10099,7 @@ (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not" [(set (match_dup 4) (not:<avx512fmaskmode> (match_dup 1))) (set (match_dup 0) - (vec_merge:VI48_AVX512VL + (vec_merge:VI1248_AVX512VLBW (match_dup 3) (match_dup 2) (match_dup 4)))] @@ -21753,6 +21735,30 @@ (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt" (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt_avx512" + [(set (match_operand:SI 0 "register_operand" "=r,jr") + (unspec:SI + [(subreg:VF_128_256 + (vec_merge:<sseintvecmode> + (match_operand:<sseintvecmode> 3 "vector_all_ones_operand") + (match_operand:<sseintvecmode> 4 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:<sseintvecmode> 1 "register_operand" "x,x") + (match_operand:<sseintvecmode> 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)) 0)] + UNSPEC_MOVMSK))] + "TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))] + "operands[1] = gen_lowpart (<MODE>mode, operands[1]);" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt" [(set (match_operand:DI 0 "register_operand" "=r,jr") (any_extend:DI @@ -21772,6 +21778,31 @@ (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt" (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt_avx512" + [(set (match_operand:DI 0 "register_operand" "=r,jr") + (any_extend:DI + (unspec:SI + [(subreg:VF_128_256 + (vec_merge:<sseintvecmode> + (match_operand:<sseintvecmode> 3 "vector_all_ones_operand") + (match_operand:<sseintvecmode> 4 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:<sseintvecmode> 1 "register_operand" "x,x") + (match_operand:<sseintvecmode> 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)) 0)] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "operands[1] = gen_lowpart (<MODE>mode, operands[1]);" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift" [(set (match_operand:SI 0 "register_operand" "=r,jr") (unspec:SI @@ -21961,6 +21992,34 @@ (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt" (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt_avx512" + [(set (match_operand:SI 0 "register_operand" "=r,jr") + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI1_AVX2 1 "register_operand" "x,x") + (match_operand:VI1_AVX2 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))] + "" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt" [(set (match_operand:DI 0 "register_operand" "=r,jr") (zero_extend:DI @@ -21984,6 +22043,35 @@ (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt" (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt_avx512" + [(set (match_operand:DI 0 "register_operand" "=r,jr") + (zero_extend:DI + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI1_AVX2 1 "register_operand" "x,x") + (match_operand:VI1_AVX2 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "#" + "&& 1" + [(set (match_dup 0) + (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*sse2_pmovmskb_ext_lt" [(set (match_operand:DI 0 "register_operand" "=r,jr") (sign_extend:DI @@ -22007,6 +22095,63 @@ (define_insn_and_split "*sse2_pmovmskb_ext_lt" (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*sse2_pmovmskb_ext_lt_avx512" + [(set (match_operand:DI 0 "register_operand" "=r,jr") + (sign_extend:DI + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI1_AVX2 1 "register_operand" "x,x") + (match_operand:VI1_AVX2 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "#" + "&& 1" + [(set (match_dup 0) + (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*pmovsk_kmask_v16qi_avx512" + [(set (match_operand:SI 0 "register_operand") + (unspec:SI + [(vec_merge:V16QI + (match_operand:V16QI 2 "vector_all_ones_operand") + (match_operand:V16QI 3 "const0_operand") + (match_operand:HI 1 "register_operand"))] + UNSPEC_MOVMSK))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (zero_extend:SI (match_dup 1)))]) + +(define_insn_and_split "*pmovsk_mask_v32qi_avx512" + [(set (match_operand:SI 0 "register_operand") + (unspec:SI + [(vec_merge:V32QI + (match_operand:V32QI 2 "vector_all_ones_operand") + (match_operand:V32QI 3 "const0_operand") + (match_operand:SI 1 "register_operand"))] + UNSPEC_MOVMSK))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (match_dup 1))]) + ;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest. (define_mode_attr vi1avx2const [(V32QI "0xffffffff") (V16QI "0xffff")]) @@ -22025,6 +22170,47 @@ (define_split (match_dup 0)] UNSPEC_PTEST))]) +(define_insn_and_split "*pmovsk_mask_cmp_<mode>_avx512" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 0 "vector_all_ones_operand") + (match_operand:VI1_AVX2 3 "const0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand"))] + UNSPEC_MOVMSK) + (match_operand 2 "const_int_operand")))] + "TARGET_AVX512VL && UINTVAL (operands[2]) <= <vi1avx2const>" + "#" + "&& 1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (match_dup 1) + (match_dup 2)))] + "operands[2] = gen_int_mode (UINTVAL (operands[2]), <avx512fmaskmode>mode);") + +(define_insn_and_split "*pmovsk_ptest_<mode>_avx512" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI1_AVX2 0 "vector_operand") + (match_operand:VI1_AVX2 1 "const0_operand") + (const_int 0)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK) + (match_operand 2 "const_int_operand")))] + "TARGET_AVX512VL && (INTVAL (operands[2]) == (int) (<vi1avx2const>))" + "#" + "&& 1" + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ [(match_dup 0) + (match_dup 0)] + UNSPEC_PTEST))]) + (define_expand "sse2_maskmovdqu" [(set (match_operand:V16QI 0 "memory_operand") (unspec:V16QI [(match_operand:V16QI 1 "register_operand") -- 2.31.1