https://gcc.gnu.org/g:74d5e928d5fe9804fdacb393d36d1be58feb60fb

commit r17-494-g74d5e928d5fe9804fdacb393d36d1be58feb60fb
Author: Roger Sayle <[email protected]>
Date:   Wed May 13 12:31:12 2026 +0100

    x86: Shorter load immediate constants with -Oz
    
    This patch adds two peephole2 patterns to i386.md to decrease the size
    of some integer loads.  These replace "movl $const, %eax" (5 bytes)
    with "xorl %eax, %eax" followed by either "movb $const,%al" or
    "movb $const,%ah" (together 4 bytes), for suitable constants and
    suitable general registers, when the flags register is dead.
    
    Ideally modern Intel and AMD prcoessors can recognize these sequences
    during instruction decode (avoiding any partial register stall in
    the same way they avoid the false dependence for the xorl), and
    internally generate a single uop, treating these bytes like an
    alternate instruction encoding.
    
    2026-05-13  Roger Sayle  <[email protected]>
                Uros Bizjak  <[email protected]>
    
    gcc/ChangeLog
            PR target/32803
            * config/i386/i386.md (peephole2): Don't transform xorl;movb into
            movzb with -Oz.
            (peephole2): Convert movl into xorl;movb (strict_low_part) with -Oz.
            (peephole2): Likewise, convert movl into xorl;movb [abcd]h with -Oz.
    
    gcc/testsuite/ChangeLog
            PR target/32803
            * gcc.target/i386/pr32803-2.c: New test case.
            * gcc.target/i386/pr32803-3.c: Likewise.

Diff:
---
 gcc/config/i386/i386.md                   | 33 ++++++++++++++++++++++++++++++-
 gcc/testsuite/gcc.target/i386/pr32803-2.c | 25 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr32803-3.c | 24 ++++++++++++++++++++++
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b4e397bc925b..a486ea3d79d5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4985,7 +4985,8 @@
   "REGNO (operands[0]) == REGNO (operands[1])
    && (<SWI48:MODE>mode != SImode
        || !TARGET_ZERO_EXTEND_WITH_AND
-       || !optimize_function_for_speed_p (cfun))"
+       || !optimize_function_for_speed_p (cfun))
+   && !(optimize_insn_for_size_p () && optimize_size > 1)"
   [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
 
 ;; Likewise, but preserving FLAGS_REG.
@@ -4998,6 +4999,36 @@
        || !TARGET_ZERO_EXTEND_WITH_AND
        || !optimize_function_for_speed_p (cfun))"
   [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
+
+;; With -Oz, convert mov eax,200 (5 bytes) to xor eax,eax; mov al,200
+;; (4 bytes) when the flags register is dead.
+(define_peephole2
+  [(set (match_operand:SWI48 0 "any_QIreg_operand")
+       (match_operand:SWI48 1 "const_int_operand"))]
+  "optimize_insn_for_size_p () && optimize_size > 1
+   && IN_RANGE (INTVAL (operands[1]), 128, 255)
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (strict_low_part (match_dup 2)) (match_dup 3))]
+{
+  operands[2] = gen_lowpart (QImode, operands[0]);
+  operands[3] = gen_int_mode (INTVAL (operands[1]), QImode);
+})
+
+;; With -Oz, convert mov eax,512 (5 bytes) to xor eax,eax; mov ah,2
+;; (4 bytes) when the flags register is dead.
+(define_peephole2
+  [(set (match_operand:SWI48 0 "QIreg_operand")
+       (match_operand:SWI48 1 "const_int_operand"))]
+  "optimize_insn_for_size_p () && optimize_size > 1
+   && (INTVAL (operands[1]) & ~0xff00) == 0
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (zero_extract:SWI48 (match_dup 0) (const_int 8) (const_int 8))
+        (match_dup 2))]
+  "operands[2] = gen_int_mode (INTVAL (operands[1]) >> 8, QImode);")
 
 ;; Sign extension instructions
 
diff --git a/gcc/testsuite/gcc.target/i386/pr32803-2.c 
b/gcc/testsuite/gcc.target/i386/pr32803-2.c
new file mode 100644
index 000000000000..7fc8f5aa18e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr32803-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-Oz" } */
+
+int foo()
+{
+  return 200;
+}
+
+long fool()
+{
+  return 200;
+}
+
+int bar()
+{
+  return 512;
+}
+
+long barl()
+{
+  return 512;
+}
+/* { dg-final { scan-assembler-times "xorl\[ \t\]*%eax, %eax" 4 } } */
+/* { dg-final { scan-assembler-times "movb\[ \t\]*\\\$-56, %al" 2 } } */
+/* { dg-final { scan-assembler-times "movb\[ \t\]*\\\$2, %ah" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr32803-3.c 
b/gcc/testsuite/gcc.target/i386/pr32803-3.c
new file mode 100644
index 000000000000..052c7b3c7ed7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr32803-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+
+int foo()
+{
+  return 200;
+}
+
+long fool()
+{
+  return 200;
+}
+
+int bar()
+{
+  return 512;
+}
+
+long barl()
+{
+  return 512;
+}
+/* { dg-final { scan-assembler-times "movl\[\\t \]*\\\$200, %eax" 2 } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]*\\\$512, %eax" 2 } } */

Reply via email to