https://gcc.gnu.org/g:e715204f203d318524ae86f3f2a1e8d5d7cb08dc

commit r15-930-ge715204f203d318524ae86f3f2a1e8d5d7cb08dc
Author: Uros Bizjak <ubiz...@gmail.com>
Date:   Thu May 30 21:27:42 2024 +0200

    i386: Rewrite bswaphi2 handling [PR115102]
    
    Introduce *bswaphi2 instruction pattern and enable bswaphi2 expander
    also for non-movbe targets.  The testcase:
    
    unsigned short bswap8 (unsigned short val)
    {
      return ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
    }
    
    now expands through bswaphi2 named expander.
    
    Rewrite bswaphi_lowpart insn pattern as bswaphisi2_lowpart in the RTX form
    that combine pass can use to simplify:
    
    Trying 6, 9, 8 -> 10:
        6: r99:SI=bswap(r103:SI)
        9: {r107:SI=r103:SI&0xffffffffffff0000;clobber flags:CC;}
          REG_DEAD r103:SI
          REG_UNUSED flags:CC
        8: {r106:SI=r99:SI 0>>0x10;clobber flags:CC;}
          REG_DEAD r99:SI
          REG_UNUSED flags:CC
       10: {r104:SI=r106:SI|r107:SI;clobber flags:CC;}
          REG_DEAD r107:SI
          REG_DEAD r106:SI
          REG_UNUSED flags:CC
    
    Successfully matched this instruction:
    (set (reg:SI 104 [ _8 ])
        (ior:SI (and:SI (reg/v:SI 103 [ val ])
                (const_int -65536 [0xffffffffffff0000]))
            (lshiftrt:SI (bswap:SI (reg/v:SI 103 [ val ]))
                (const_int 16 [0x10]))))
    allowing combination of insns 6, 8, 9 and 10
    
    when compiling the following testcase:
    
    unsigned int bswap8 (unsigned int val)
    {
      return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
    }
    
    to produce:
    
            movl    %edi, %eax
            xchgb   %ah, %al
            ret
    
    The expansion now always goes through a clobberless form of the bswaphi
    instruction.  The instruction is conditionally converted to a rotate at
    peephole2 pass.  This significantly simplifies bswaphisi2_lowpart
    insn pattern attributes.
    
            PR target/115102
    
    gcc/ChangeLog:
    
            * config/i386/i386.md (bswaphi2): Also enable for !TARGET_MOVBE.
            (*bswaphi2): New insn pattern.
            (bswaphisi2_lowpart): Rename from bswaphi_lowpart.  Rewrite
            insn RTX to match the expected form of the combine pass.
            Remove rol{w} alternative and corresponding attributes.
            (bswsaphisi2_lowpart peephole2): New peephole2 pattern to
            conditionally convert bswaphisi2_lowpart to rotlhi3_1_slp.
            (bswapsi2): Update expander for rename.
            (rotlhi3_1_slp splitter): Conditionally split to bswaphi2.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr115102.c: New test.

Diff:
---
 gcc/config/i386/i386.md                  | 77 +++++++++++++++++++++-----------
 gcc/testsuite/gcc.target/i386/pr115102.c | 10 +++++
 2 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c162cd42386..375654cf74e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17210,9 +17210,7 @@
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed
   && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
- [(parallel [(set (strict_low_part (match_dup 0))
-                 (bswap:HI (match_dup 0)))
-            (clobber (reg:CC FLAGS_REG))])])
+ [(set (match_dup 0) (bswap:HI (match_dup 0)))])
 
 ;; Rotations through carry flag
 (define_insn "rcrsi2"
@@ -20730,12 +20728,11 @@
     operands[1] = force_reg (SImode, operands[1]);
   else
     {
-      rtx x = operands[0];
+      rtx x = gen_reg_rtx (SImode);
 
-      emit_move_insn (x, operands[1]);
-      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      emit_insn (gen_bswaphisi2_lowpart (x, operands[1]));
       emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
-      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      emit_insn (gen_bswaphisi2_lowpart (operands[0], x));
       DONE;
     }
 })
@@ -20767,7 +20764,11 @@
 (define_expand "bswaphi2"
   [(set (match_operand:HI 0 "register_operand")
        (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
-  "TARGET_MOVBE")
+  ""
+{
+  if (!TARGET_MOVBE)
+    operands[1] = force_reg (HImode, operands[1]);
+})
 
 (define_insn "*bswaphi2_movbe"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
@@ -20788,33 +20789,55 @@
    (set_attr "bdver1_decode" "double,*,*")
    (set_attr "mode" "QI,HI,HI")])
 
+(define_insn "*bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=Q")
+       (bswap:HI (match_operand:HI 1 "register_operand" "0")))]
+  "!TARGET_MOVBE"
+  "xchg{b}\t{%h0, %b0|%b0, %h0}"
+  [(set_attr "type" "imov")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "QI")])
+
 (define_peephole2
   [(set (match_operand:HI 0 "general_reg_operand")
        (bswap:HI (match_dup 0)))]
-  "TARGET_MOVBE
-   && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
+  "!(TARGET_USE_XCHGB ||
+     TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
              (clobber (reg:CC FLAGS_REG))])])
 
-(define_insn "bswaphi_lowpart"
-  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
-       (bswap:HI (match_dup 0)))
-   (clobber (reg:CC FLAGS_REG))]
+(define_insn "bswaphisi2_lowpart"
+  [(set (match_operand:SI 0 "register_operand" "=Q")
+       (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+                       (const_int -65536))
+               (lshiftrt:SI (bswap:SI (match_dup 1))
+                            (const_int 16))))]
   ""
-  "@
-    xchg{b}\t{%h0, %b0|%b0, %h0}
-    rol{w}\t{$8, %0|%0, 8}"
-  [(set (attr "preferred_for_size")
-     (cond [(eq_attr "alternative" "0")
-             (symbol_ref "true")]
-          (symbol_ref "false")))
-   (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "0")
-             (symbol_ref "TARGET_USE_XCHGB")]
-          (symbol_ref "!TARGET_USE_XCHGB")))
-   (set_attr "length" "2,4")
-   (set_attr "mode" "QI,HI")])
+  "xchg{b}\t{%h0, %b0|%b0, %h0}"
+  [(set_attr "type" "imov")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "QI")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "general_reg_operand")
+       (ior:SI (and:SI (match_dup 0)
+                       (const_int -65536))
+               (lshiftrt:SI (bswap:SI (match_dup 0))
+                            (const_int 16))))]
+  "!(TARGET_USE_XCHGB ||
+     TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (strict_low_part (match_dup 0))
+                                   (rotate:HI (match_dup 0) (const_int 8)))
+             (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (HImode, operands[0]);")
 
 (define_expand "paritydi2"
   [(set (match_operand:DI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr115102.c 
b/gcc/testsuite/gcc.target/i386/pr115102.c
new file mode 100644
index 00000000000..e603980b3c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115102.c
@@ -0,0 +1,10 @@
+/* PR target/115102 */
+/* { dg-do compile } */
+/* { dg-options "-Os -march=x86-64 -dp" } */
+
+unsigned int bswap8 (unsigned int val)
+{
+  return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
+}
+
+/* { dg-final { scan-assembler "bswaphisi2_lowpart" } } */

Reply via email to