[PATCH: RL78] Optimize libgcc routines using clrw and clrb

Kaushik Phatak Tue, 05 Apr 2016 01:09:56 -0700

Hi,
Please find below a patch that optimizes libgcc routines for the RL78 target.


This is similar to my earlier patch submitted here,
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00415.html

The patch optimizes the loading of immediate value in the case of 0x00, by 
using the clrw or clrb instruction.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes 
for movw and mov.

Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.

Best Regards,
Kaushik

p.s. Kindly ignore any disclaimers at end of this e-mail as they are 
auto-inserted.
Apologies for the same.

2016-04-06  Kaushik Phatak <kaushik.pha...@kpit.com>

        * config/rl78/bit-count.S: Use clrw/clrb where possible.
        * config/rl78/cmpsi2.S: Likewise.
        * config/rl78/divmodhi.S Likewise.
        * config/rl78/divmodsi.S Likewise.
        * config/rl78/fpbit-sf.S Likewise.
        * config/rl78/fpmath-sf.S Likewise.
        * config/rl78/mulsi3.S Likewise.

Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S      (revision 3174)
+++ libgcc/config/rl78/bit-count.S      (working copy)
@@ -139,7 +139,7 @@
        xor1    cy, a.5
        xor1    cy, a.6
        xor1    cy, a.7
-       movw    ax, #0
+       clrw    ax
        bnc     $1f
        incw    ax
 1:
@@ -190,7 +190,7 @@
        movw    ax, sp
        addw    ax, #4
        movw    hl, ax
-       mov     a, #0
+       clrb    a
 1:
        xch     a, b
        mov     a, [hl]
@@ -207,7 +207,7 @@
        bnz     $1b
 
        mov     x, a
-       mov     a, #0
+       clrb    a
        movw    r8, ax
        ret     
 END_FUNC       ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
 
        ;; They differ.  Subtract *S2 from *S1 and return as the result.
        mov     x, a
-       mov     a, #0
-       mov     r9, #0
+       clrb    a
+       clrb    r9
        subw    ax, r8
 1:
        movw    r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S       (revision 3174)
+++ libgcc/config/rl78/divmodhi.S       (working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
        movw    hl, ax
-       movw    ax, #0
+       clrw    ax
        subw    ax, [hl]
        movw    [hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S       (revision 3174)
+++ libgcc/config/rl78/divmodsi.S       (working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
        movw    hl, ax
-       movw    ax, #0
+       clrw    ax
        subw    ax, [hl]
        movw    [hl], ax
-       movw    ax, #0
+       clrw    ax
        sknc
        decw    ax
        subw    ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S       (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S       (working copy)
@@ -117,7 +117,7 @@
        call    $!__int_iszero
        bnz     $2f
        ;; At this point, both args are zero.
-       mov     a, #0
+       clrb    a
        ret
 
 2:
@@ -151,7 +151,7 @@
        bc      $ybig_cmpsf     ; branch if X < Y
        bnz     $xbig_cmpsf     ; branch if X > Y
 
-       mov     a, #0
+       clrb    a
        ret
 
 xbig_cmpsf:                    ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
        movw    r10, #0x7fff
        ret
        ;; -inf
-2:     mov     r8, #0
+2:     clrb    r8
        mov     r10, #0x8000
        ret
        
@@ -302,10 +302,10 @@
        clr1    a.7
        call    $!__int_fixunssfsi
 
-       movw    ax, #0
+       clrw    ax
        subw    ax, r8
        movw    r8, ax
-       movw    ax, #0
+       clrw    ax
         sknc
         decw    ax
         subw    ax, r10
@@ -410,7 +410,7 @@
        set1    a.7
 
        ;; Clear B:C:R12:R13
-       movw    bc, #0
+       clrw    bc
        movw    r12, #0
 
        ;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
        ;; If negative convert to positive ...
        movw    hl, ax
-       movw    ax, #0
+       clrw    ax
        subw    ax, bc
        movw    bc, ax
-       movw    ax, #0
+       clrw    ax
        sknc
        decw    ax
        subw    ax, hl
@@ -533,7 +533,7 @@
        bnz     $1f
        movw    ax, bc
        cmpw    ax, #0
-       movw    ax, #0
+       clrw    ax
        bnz     $1f
 
        ;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S      (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S      (working copy)
@@ -87,7 +87,7 @@
        or      a, #0x80
        mov     A_FRAC_H, a
 
-       mov     a, #0
+       clrb    a
        mov     A_FRAC_HH, a
 
        ;; rounding-bit-shift
@@ -273,7 +273,7 @@
        ;; "zero out" b
        movw    ax, A_EXP
        movw    B_EXP, ax
-       movw    ax, #0
+       clrw    ax
        movw    B_FRAC_L, ax
        movw    B_FRAC_H, ax
        br      $5f
@@ -281,7 +281,7 @@
        ;; "zero out" a
        movw    ax, B_EXP
        movw    A_EXP, ax
-       movw    ax, #0
+       clrw    ax
        movw    A_FRAC_L, ax
        movw    A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
        bt      a.7, $.L706
        
        ;; subtraction was positive
-       mov     a, #0
+       clrb    a
        mov     A_SIGN, a
        br      $.L712
 
@@ -543,7 +543,7 @@
        or      a, A_FRAC_H
        or      a, A_FRAC_HH
        bnz     $1f
-       movw    ax, #0
+       clrw    ax
        movw    A_EXP, ax
 1:     
        mov     a, A_FRAC_H
@@ -682,7 +682,7 @@
        movw    ax, B_FRAC_H
        movw    [sp+10], ax
 
-       movw    ax, #0
+       clrw    ax
        movw    [sp+4], ax
        movw    [sp+6], ax
        movw    [sp+12], ax
@@ -867,7 +867,7 @@
        and     a, #0x80
        mov     r11, a
        movw    r8, #0
-       mov     r10, #0
+       clrb    r10
        ret
        
 1:     
@@ -930,7 +930,7 @@
        movw    ax, B_FRAC_H
        movw    [sp+10], ax
 
-       movw    ax, #0
+       clrw    ax
        movw    [sp+0], ax
        movw    [sp+2], ax
        movw    [sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
        movw    ax, bc
 
 .Lmul_hisi_top:
-       movw    bc, #0
+       clrw    bc
 
 .Lmul_hisi_loop:
        shrw    ax, 1

rl78_libgcc_optimize_clrw.patch
Description: rl78_libgcc_optimize_clrw.patch

[PATCH: RL78] Optimize libgcc routines using clrw and clrb

Reply via email to