https://gcc.gnu.org/g:65cbb24fe6bb418b1736296e9c23d782f169cae4

commit r16-6220-g65cbb24fe6bb418b1736296e9c23d782f169cae4
Author: Georg-Johann Lay <[email protected]>
Date:   Tue Dec 16 20:49:31 2025 +0100

    AVR: Tweak shift execution times in some cases.
    
    When the tail reg (last register) in a shift is an upper register,
    then inserting a sequence of 0s and a 1 into the tail register
    only takes 2 instruction.  The preparation will be one instruction
    longer, but the loop body will be one instruction shorter, saving
    count-1 cycles.
    
    For example uint32_t R22 >> 5 will turn from:
    
        ldi srcatch, 5
    1:  lsr r25
        ror r24
        ror r23
        ror r22
        dec scratch
        brne 1b
    
    to:
    
        andi r22,-32   ; Set lower 5 bits to 0.
        ori r22,16     ; Set bit 4 to 1.
        ;; Now r22 = 0b***10000
    1:  lsr r25
        ror r24
        ror r23
        ror r22
        brcc 1b        ; Carry will be 0, 0, 0, 0, 1.
    
    gcc/
            * config/avr/avr.cc (avr_out_shift_with_cnt): Tweak
            execution time by count-1 cycles in some cases.

Diff:
---
 gcc/config/avr/avr.cc | 44 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index dd1bfbcdfcbc..26de3129aee4 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -6961,8 +6961,11 @@ avr_out_shift_with_cnt (rtx_code code, rtx_insn *insn, 
rtx operands[],
   bool second_label = true;
   bool saved_in_tmp = false;
   bool use_zero_reg = false;
+  bool tail_bits = false;
   const int t_len = GET_MODE_SIZE (GET_MODE (operands[0]));
-  rtx op[5];
+  const int regno = REGNO (operands[0]);
+  const int tail_regno = regno + (code == ASHIFT ? t_len - 1 : 0);
+  rtx op[6];
 
   op[0] = operands[0];
   op[1] = operands[1];
@@ -6988,11 +6991,13 @@ avr_out_shift_with_cnt (rtx_code code, rtx_insn *insn, 
rtx operands[],
       if (count <= 0)
        return "";
 
-      if (count < 8 && !scratch)
+      if (count < 8 && tail_regno >= REG_16)
+       tail_bits = true;
+      else if (count < 8 && !scratch)
        use_zero_reg = true;
 
       if (optimize_size)
-       max_len = t_len + (scratch ? 3 : (use_zero_reg ? 4 : 5));
+       max_len = t_len + (scratch || tail_bits ? 3 : (use_zero_reg ? 4 : 5));
 
       if (t_len * count <= max_len)
        {
@@ -7004,7 +7009,27 @@ avr_out_shift_with_cnt (rtx_code code, rtx_insn *insn, 
rtx operands[],
          return "";
        }
 
-      if (scratch)
+      if (tail_bits)
+       {
+         /* The tail register (the last one in a multi-byte shift) is
+            an upper register, so we can insert a stop mask into it.
+            This will cost 2 instructions, but the loop body is one
+            instruction shorter.  That yields the same code size like
+            the "scratch" case but saves count-1 cycles.
+            The loop branch is a BRCC that sees count-1 zeros and then
+            a one to drop out of the loop.  */
+
+         op[3] = all_regs_rtx[tail_regno];
+         op[4] = gen_int_mode (code == ASHIFT
+                               ? 0xff >> count
+                               : 0xff << count, QImode);
+         op[5] = gen_int_mode (code == ASHIFT
+                               ? 0x80 >> (count - 1)
+                               : 0x01 << (count - 1), QImode);
+         avr_asm_len ("andi %3,%4" CR_TAB
+                      "ori %3,%5", op, plen, 2);
+       }
+      else if (scratch)
        {
          avr_asm_len ("ldi %3,%2", op, plen, 1);
        }
@@ -7065,8 +7090,15 @@ avr_out_shift_with_cnt (rtx_code code, rtx_insn *insn, 
rtx operands[],
   if (second_label)
     avr_asm_len ("2:", op, plen, 0);
 
-  avr_asm_len (use_zero_reg ? "lsr %3" : "dec %3", op, plen, 1);
-  avr_asm_len (second_label ? "brpl 1b" : "brne 1b", op, plen, 1);
+  if (tail_bits)
+    {
+      avr_asm_len ("brcc 1b", op, plen, 1);
+    }
+  else
+    {
+      avr_asm_len (use_zero_reg ? "lsr %3" : "dec %3", op, plen, 1);
+      avr_asm_len (second_label ? "brpl 1b" : "brne 1b", op, plen, 1);
+    }
 
   if (saved_in_tmp)
     avr_asm_len ("mov %3,%4", op, plen, 1);

Reply via email to