Add V2QImode shift operations and split them to synthesized double HI/LO QImode operations with integer registers.
Also robustify arithmetic split patterns. 2022-01-13 Uroš Bizjak <ubiz...@gmail.com> gcc/ChangeLog: PR target/103861 * config/i386/i386.md (*ashlqi_ext<mode>_2): New insn pattern. (*<any_shiftrt:insn>qi_ext<mode>_2): Ditto. * config/i386/mmx.md (<any_shift:insn>v2qi): New insn_and_split pattern. gcc/testsuite/ChangeLog: PR target/103861 * gcc.target/i386/pr103861.c (shl,ashr,lshr): New tests. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Pushed to master. Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index bcaaa4993b1..c2acb1dbd90 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12413,6 +12413,54 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) +(define_insn "*ashlqi_ext<mode>_2" + [(set (zero_extract:SWI248 + (match_operand:SWI248 0 "register_operand" "+Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (ashift:QI + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 1 "register_operand" "0") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ + rtx_equal_p (operands[0], operands[1])" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t%h0, %h0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{b}\t%h0"; + else + return "sal{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + ;; See comment above `ashl<mode>3' about how this works. (define_expand "<insn><mode>3" @@ -13143,6 +13191,39 @@ (const_string "0") (const_string "*"))) (set_attr "mode" "<MODE>")]) + +(define_insn "*<insn>qi_ext<mode>_2" + [(set (zero_extract:SWI248 + (match_operand:SWI248 0 "register_operand" "+Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (any_shiftrt:QI + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 1 "register_operand" "0") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ + rtx_equal_p (operands[0], operands[1])" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{b}\t%h0"; + else + return "<shift>{b}\t{%2, %h0|%h0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) ;; Rotate instructions diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3d99a5e851b..782da220f98 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1657,7 +1657,8 @@ (neg:V2QI (match_operand:V2QI 1 "general_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" [(parallel [(set (strict_low_part (match_dup 0)) (neg:QI (match_dup 1))) @@ -1683,7 +1684,8 @@ (neg:V2QI (match_operand:V2QI 1 "sse_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && TARGET_SSE2 && reload_completed" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (minus:V16QI (match_dup 0) (match_dup 1)))] @@ -1757,7 +1759,8 @@ (match_operand:V2QI 1 "general_reg_operand") (match_operand:V2QI 2 "general_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" [(parallel [(set (strict_low_part (match_dup 0)) (plusminus:QI (match_dup 1) (match_dup 2))) @@ -1790,7 +1793,8 @@ (match_operand:V2QI 1 "sse_reg_operand") (match_operand:V2QI 2 "sse_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_SSE2 && reload_completed" + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && TARGET_SSE2 && reload_completed" [(set (match_dup 0) (plusminus:V16QI (match_dup 1) (match_dup 2)))] { @@ -2387,6 +2391,38 @@ (const_string "0"))) (set_attr "mode" "TI")]) +(define_insn_and_split "<insn>v2qi3" + [(set (match_operand:V2QI 0 "register_operand" "=Q") + (any_shift:V2QI + (match_operand:V2QI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8)) + (subreg:HI + (any_shift:QI + (subreg:QI + (zero_extract:HI (match_dup 4) + (const_int 8) + (const_int 8)) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (strict_low_part (match_dup 0)) + (any_shift:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[4] = lowpart_subreg (HImode, operands[1], V2QImode); + operands[3] = lowpart_subreg (HImode, operands[0], V2QImode); + operands[1] = lowpart_subreg (QImode, operands[1], V2QImode); + operands[0] = lowpart_subreg (QImode, operands[0], V2QImode); +} + [(set_attr "type" "multi") + (set_attr "mode" "QI")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral comparisons diff --git a/gcc/testsuite/gcc.target/i386/pr103861.c b/gcc/testsuite/gcc.target/i386/pr103861.c index 158717645b6..064b617774b 100644 --- a/gcc/testsuite/gcc.target/i386/pr103861.c +++ b/gcc/testsuite/gcc.target/i386/pr103861.c @@ -3,6 +3,7 @@ /* { dg-options "-O2 -dp" } */ typedef char __v2qi __attribute__ ((__vector_size__ (2))); +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); __v2qi and (__v2qi a, __v2qi b) { return a & b; }; @@ -20,4 +21,10 @@ __v2qi minus (__v2qi a, __v2qi b) { return a - b; }; __v2qi neg (__v2qi a) { return -a; }; +__v2qi shl (__v2qi a, int b) { return a << b; }; + +__v2qi ashr (__v2qi a, int b) { return a >> b; }; + +__v2qu lshr (__v2qu a, int b) { return a >> b; }; + /* { dg-final { scan-assembler-not "insvhi" } } */