[PATCH: RL78] Optimize libgcc routines using clrw and clrb

Kaushik Phatak Fri, 05 Feb 2016 04:57:44 -0800

Hi,
Please find below a simple patch which optimizes the loading of immediate value 
by using the clrw or clrb 
instruction in case a 0x00 is being loaded into the register.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes 
for movw and mov.


There is a total of about 94 bytes code size improvement with this patch in 
these libgcc routines.

The following routines have improved code size,
___mulsi3   : 2 bytes
___divsi3   : 20 bytes
___modsi3   : 20 bytes
___divhi3   : 10 bytes
___modhi3   : 10 bytes
___parityqi_internal : 2 bytes
__int_cmpsf : 2 bytes
___fixsfsi  : 5 bytes
___fixunssfsi : 2 bytes
___floatsisf  : 6 bytes
_int_unpack_sf : 1 bytes
___addsf3 : 5 bytes
__rl78_int_pack_a_r8 : 2 bytes
___mulsf3  : 2 bytes
___divsf3  : 3 bytes
__gcc_bcmp :  2 bytes


I have also attached a draft version of a similar patch 
(rl78_libgcc_optimize_draft.patch), which goes further and 
removes movw immediate to other saddr registers and replaces them with 2 
instructions, i.e.
 START_FUNC ___modhi3
        ;; r8 = 4[sp] % 6[sp]
-       movw    de, #0
+       clrw    ax
+       movw    de,ax
        mov     a, [sp+5]

This patch improves code size by 1 byte for each such substitution, however 
does add an extra clock cycle.

We may consider this patch in case we are purely looking for code size 
improvement, assuming the libraries
are built with -Os. This shows a total of 134 bytes improvement in code size.

Patch1: rl78_libgcc_optimize_clrw.patch - 94 bytes improvement in code size.
Patch2: rl78_libgcc_optimize_draft.patch - 134 bytes improvement in code size.

Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.

Best Regards,
Kaushik

p.s. Kindly ignore any disclaimers at end of this e-mail as they are 
auto-inserted.
Apologies for the same.

2016-02-05  Kaushik Phatak <kaushik.pha...@kpit.com>

        * config/rl78/bit-count.S: Use clrw/clrb where possible.
                * config/rl78/cmpsi2.S: Likewise.
                * config/rl78/divmodhi.S Likewise.
                * config/rl78/divmodsi.S Likewise.
                * config/rl78/fpbit-sf.S Likewise.
                * config/rl78/fpmath-sf.S Likewise.
                * config/rl78/mulsi3.S Likewise.
                
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S      (revision 3174)
+++ libgcc/config/rl78/bit-count.S      (working copy)
@@ -139,7 +139,7 @@
        xor1    cy, a.5
        xor1    cy, a.6
        xor1    cy, a.7
-       movw    ax, #0
+       clrw    ax
        bnc     $1f
        incw    ax
 1:
@@ -190,7 +190,7 @@
        movw    ax, sp
        addw    ax, #4
        movw    hl, ax
-       mov     a, #0
+       clrb    a
 1:
        xch     a, b
        mov     a, [hl]
@@ -207,7 +207,7 @@
        bnz     $1b
 
        mov     x, a
-       mov     a, #0
+       clrb    a
        movw    r8, ax
        ret     
 END_FUNC       ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
 
        ;; They differ.  Subtract *S2 from *S1 and return as the result.
        mov     x, a
-       mov     a, #0
-       mov     r9, #0
+       clrb    a
+       clrb    r9
        subw    ax, r8
 1:
        movw    r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S       (revision 3174)
+++ libgcc/config/rl78/divmodhi.S       (working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
        movw    hl, ax
-       movw    ax, #0
+       clrw    ax
        subw    ax, [hl]
        movw    [hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S       (revision 3174)
+++ libgcc/config/rl78/divmodsi.S       (working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
        movw    hl, ax
-       movw    ax, #0
+       clrw    ax
        subw    ax, [hl]
        movw    [hl], ax
-       movw    ax, #0
+       clrw    ax
        sknc
        decw    ax
        subw    ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S       (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S       (working copy)
@@ -117,7 +117,7 @@
        call    $!__int_iszero
        bnz     $2f
        ;; At this point, both args are zero.
-       mov     a, #0
+       clrb    a
        ret
 
 2:
@@ -151,7 +151,7 @@
        bc      $ybig_cmpsf     ; branch if X < Y
        bnz     $xbig_cmpsf     ; branch if X > Y
 
-       mov     a, #0
+       clrb    a
        ret
 
 xbig_cmpsf:                    ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
        movw    r10, #0x7fff
        ret
        ;; -inf
-2:     mov     r8, #0
+2:     clrb    r8
        mov     r10, #0x8000
        ret
        
@@ -302,10 +302,10 @@
        clr1    a.7
        call    $!__int_fixunssfsi
 
-       movw    ax, #0
+       clrw    ax
        subw    ax, r8
        movw    r8, ax
-       movw    ax, #0
+       clrw    ax
         sknc
         decw    ax
         subw    ax, r10
@@ -410,7 +410,7 @@
        set1    a.7
 
        ;; Clear B:C:R12:R13
-       movw    bc, #0
+       clrw    bc
        movw    r12, #0
 
        ;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
        ;; If negative convert to positive ...
        movw    hl, ax
-       movw    ax, #0
+       clrw    ax
        subw    ax, bc
        movw    bc, ax
-       movw    ax, #0
+       clrw    ax
        sknc
        decw    ax
        subw    ax, hl
@@ -533,7 +533,7 @@
        bnz     $1f
        movw    ax, bc
        cmpw    ax, #0
-       movw    ax, #0
+       clrw    ax
        bnz     $1f
 
        ;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S      (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S      (working copy)
@@ -87,7 +87,7 @@
        or      a, #0x80
        mov     A_FRAC_H, a
 
-       mov     a, #0
+       clrb    a
        mov     A_FRAC_HH, a
 
        ;; rounding-bit-shift
@@ -273,7 +273,7 @@
        ;; "zero out" b
        movw    ax, A_EXP
        movw    B_EXP, ax
-       movw    ax, #0
+       clrw    ax
        movw    B_FRAC_L, ax
        movw    B_FRAC_H, ax
        br      $5f
@@ -281,7 +281,7 @@
        ;; "zero out" a
        movw    ax, B_EXP
        movw    A_EXP, ax
-       movw    ax, #0
+       clrw    ax
        movw    A_FRAC_L, ax
        movw    A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
        bt      a.7, $.L706
        
        ;; subtraction was positive
-       mov     a, #0
+       clrb    a
        mov     A_SIGN, a
        br      $.L712
 
@@ -543,7 +543,7 @@
        or      a, A_FRAC_H
        or      a, A_FRAC_HH
        bnz     $1f
-       movw    ax, #0
+       clrw    ax
        movw    A_EXP, ax
 1:     
        mov     a, A_FRAC_H
@@ -682,7 +682,7 @@
        movw    ax, B_FRAC_H
        movw    [sp+10], ax
 
-       movw    ax, #0
+       clrw    ax
        movw    [sp+4], ax
        movw    [sp+6], ax
        movw    [sp+12], ax
@@ -867,7 +867,7 @@
        and     a, #0x80
        mov     r11, a
        movw    r8, #0
-       mov     r10, #0
+       clrb    r10
        ret
        
 1:     
@@ -930,7 +930,7 @@
        movw    ax, B_FRAC_H
        movw    [sp+10], ax
 
-       movw    ax, #0
+       clrw    ax
        movw    [sp+0], ax
        movw    [sp+2], ax
        movw    [sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
        movw    ax, bc
 
 .Lmul_hisi_top:
-       movw    bc, #0
+       clrw    bc
 
 .Lmul_hisi_loop:
        shrw    ax, 1

rl78_libgcc_optimize_draft.patch
Description: rl78_libgcc_optimize_draft.patch

rl78_libgcc_optimize_clrw.patch
Description: rl78_libgcc_optimize_clrw.patch

[PATCH: RL78] Optimize libgcc routines using clrw and clrb

Reply via email to