Ping!

On 30/04/15 10:40, Hale Wang wrote:
-----Original Message-----
From: Hale Wang [mailto:hale.w...@arm.com]
Sent: Monday, February 09, 2015 9:54 AM
To: Richard Earnshaw
Cc: Hale Wang; gcc-patches; Matthew Gretton-Dann
Subject: RE: [Ping^2] [PATCH, ARM, libgcc] New aeabi_idiv function for
armv6-m

Ping https://gcc.gnu.org/ml/gcc-patches/2014-12/msg01059.html.


Ping for trunk. Is it ok for trunk now?

Thanks,
Hale
-----Original Message-----
From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
ow...@gcc.gnu.org] On Behalf Of Hale Wang
Sent: Friday, December 12, 2014 9:36 AM
To: gcc-patches
Subject: RE: [Ping] [PATCH, ARM, libgcc] New aeabi_idiv function for
armv6- m

Ping? Already applied to arm/embedded-4_9-branch, is it OK for trunk?

-Hale

-----Original Message-----
From: Joey Ye [mailto:joey.ye...@gmail.com]
Sent: Thursday, November 27, 2014 10:01 AM
To: Hale Wang
Cc: gcc-patches
Subject: Re: [PATCH, ARM, libgcc] New aeabi_idiv function for
armv6-m

OK applying to arm/embedded-4_9-branch, though you still need
maintainer approval into trunk.

- Joey

On Wed, Nov 26, 2014 at 11:43 AM, Hale Wang <hale.w...@arm.com>
wrote:
Hi,

This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
(https://git.linaro.org/toolchain/cortex-strings.git), which was
contributed by ARM under Free BSD license.

The new aeabi_idiv routine is used to replace the one in
libgcc/config/arm/lib1funcs.S. This replacement happens within the
Thumb1 wrapper. The new routine is under LGPLv3 license.

The main advantage of this version is that it can improve the
performance of the aeabi_idiv function for Thumb1. This solution
will also increase the code size. So it will only be used if
__OPTIMIZE_SIZE__ is
not defined.

Make check passed for armv6-m.

OK for trunk?

Thanks,
Hale Wang

libgcc/ChangeLog:

2014-11-26  Hale Wang  <hale.w...@arm.com>

         * config/arm/lib1funcs.S: Add new wrapper.

===============================================
diff --git a/libgcc/config/arm/lib1funcs.S
b/libgcc/config/arm/lib1funcs.S index b617137..de66c81 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -306,34 +306,12 @@ LSYM(Lend_fde):
  #ifdef __ARM_EABI__
  .macro THUMB_LDIV0 name signed
  #if defined(__ARM_ARCH_6M__)
-       .ifc \signed, unsigned
-       cmp     r0, #0
-       beq     1f
-       mov     r0, #0
-       mvn     r0, r0          @ 0xffffffff
-1:
-       .else
-       cmp     r0, #0
-       beq     2f
-       blt     3f
+
+       push    {r0, lr}
         mov     r0, #0
-       mvn     r0, r0
-       lsr     r0, r0, #1      @ 0x7fffffff
-       b       2f
-3:     mov     r0, #0x80
-       lsl     r0, r0, #24     @ 0x80000000
-2:
-       .endif
-       push    {r0, r1, r2}
-       ldr     r0, 4f
-       adr     r1, 4f
-       add     r0, r1
-       str     r0, [sp, #8]
-       @ We know we are not on armv4t, so pop pc is safe.
-       pop     {r0, r1, pc}
-       .align  2
-4:
-       .word   __aeabi_idiv0 - 4b
+       bl      SYM(__aeabi_idiv0)
+       pop     {r1, pc}
+
  #elif defined(__thumb2__)
         .syntax unified
         .ifc \signed, unsigned
@@ -927,7 +905,158 @@ LSYM(Lover7):
         add     dividend, work
    .endif
  LSYM(Lgot_result):
-.endm
+.endm
+
+#if defined(__prefer_thumb__)
&& !defined(__OPTIMIZE_SIZE__) .macro
+BranchToDiv n, label
+       lsr     curbit, dividend, \n
+       cmp     curbit, divisor
+       blo     \label
+.endm
+
+.macro DoDiv n
+       lsr     curbit, dividend, \n
+       cmp     curbit, divisor
+       bcc     1f
+       lsl     curbit, divisor, \n
+       sub     dividend, dividend, curbit
+
+1:     adc     result, result
+.endm
+
+.macro THUMB1_Div_Positive
+       mov     result, #0
+       BranchToDiv #1, LSYM(Lthumb1_div1)
+       BranchToDiv #4, LSYM(Lthumb1_div4)
+       BranchToDiv #8, LSYM(Lthumb1_div8)
+       BranchToDiv #12, LSYM(Lthumb1_div12)
+       BranchToDiv #16, LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_large_positive):
+       mov     result, #0xff
+       lsl     divisor, divisor, #8
+       rev     result, result
+       lsr     curbit, dividend, #16
+       cmp     curbit, divisor
+       blo     1f
+       asr     result, #8
+       lsl     divisor, divisor, #8
+       beq     LSYM(Ldivbyzero_waypoint)
+
+1:     lsr     curbit, dividend, #12
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div12)
+       b       LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_loop):
+       lsr     divisor, divisor, #8
+LSYM(Lthumb1_div16):
+       Dodiv   #15
+       Dodiv   #14
+       Dodiv   #13
+       Dodiv   #12
+LSYM(Lthumb1_div12):
+       Dodiv   #11
+       Dodiv   #10
+       Dodiv   #9
+       Dodiv   #8
+       bcs     LSYM(Lthumb1_div_loop)
+LSYM(Lthumb1_div8):
+       Dodiv   #7
+       Dodiv   #6
+       Dodiv   #5
+LSYM(Lthumb1_div5):
+       Dodiv   #4
+LSYM(Lthumb1_div4):
+       Dodiv   #3
+LSYM(Lthumb1_div3):
+       Dodiv   #2
+LSYM(Lthumb1_div2):
+       Dodiv   #1
+LSYM(Lthumb1_div1):
+       sub     divisor, dividend, divisor
+       bcs     1f
+       cpy     divisor, dividend
+
+1:     adc     result, result
+       cpy     dividend, result
+       RET
+
+LSYM(Ldivbyzero_waypoint):
+       b       LSYM(Ldiv0)
+.endm
+
+.macro THUMB1_Div_Negative
+       lsr     result, divisor, #31
+       beq     1f
+       neg     divisor, divisor
+
+1:     asr     curbit, dividend, #32
+       bcc     2f
+       neg     dividend, dividend
+
+2:     eor     curbit, result
+       mov     result, #0
+       cpy     ip, curbit
+       BranchToDiv #4, LSYM(Lthumb1_div_negative4)
+       BranchToDiv #8, LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_large):
+       mov     result, #0xfc
+       lsl     divisor, divisor, #6
+       rev     result, result
+       lsr     curbit, dividend, #8
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div_negative8)
+
+       lsl     divisor, divisor, #6
+       asr     result, result, #6
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div_negative8)
+
+       lsl     divisor, divisor, #6
+       asr     result, result, #6
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div_negative8)
+
+       lsl     divisor, divisor, #6
+       beq     LSYM(Ldivbyzero_negative)
+       asr     result, result, #6
+       b       LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_negative_loop):
+       lsr     divisor, divisor, #6
+LSYM(Lthumb1_div_negative8):
+       DoDiv   #7
+       DoDiv   #6
+       DoDiv   #5
+       DoDiv   #4
+LSYM(Lthumb1_div_negative4):
+       DoDiv   #3
+       DoDiv   #2
+       bcs     LSYM(Lthumb1_div_negative_loop)
+       DoDiv   #1
+       sub     divisor, dividend, divisor
+       bcs     1f
+       cpy     divisor, dividend
+
+1:     cpy     curbit, ip
+       adc     result, result
+       asr     curbit, curbit, #1
+       cpy     dividend, result
+       bcc     2f
+       neg     dividend, dividend
+       cmp     curbit, #0
+
+2:     bpl     3f
+       neg     divisor, divisor
+
+3:     RET
+
+LSYM(Ldivbyzero_negative):
+       cpy     curbit, ip
+       asr     curbit, curbit, #1
+       bcc     LSYM(Ldiv0)
+       neg     dividend, dividend
+.endm
+#endif /* ARM Thumb version.  */
+
  /*
------------------------------------------------------------------
--
--
--
*/
  /*             Start of the Real Functions
*/
  /*
------------------------------------------------------------------
--
--
--
*/
@@ -937,6 +1066,7 @@ LSYM(Lgot_result):

         FUNC_START udivsi3
         FUNC_ALIAS aeabi_uidiv udivsi3
+#if defined(__OPTIMIZE_SIZE__)

         cmp     divisor, #0
         beq     LSYM(Ldiv0)
@@ -954,6 +1084,13 @@ LSYM(udivsi3_skip_div0_test):
         pop     { work }
         RET

+#else
+
+LSYM(udivsi3_skip_div0_test):
+       THUMB1_Div_Positive
+
+#endif
+
  #elif defined(__ARM_ARCH_EXT_IDIV__)

         ARM_FUNC_START udivsi3
@@ -1066,7 +1203,7 @@ LSYM(Lover10):
         RET

  #else  /* ARM version.  */
-
+
         FUNC_START umodsi3

         subs    r2, r1, #1                      @ compare divisor with 1
@@ -1091,8 +1228,9 @@ LSYM(Lover10):

  #if defined(__prefer_thumb__)

-       FUNC_START divsi3
+       FUNC_START divsi3
         FUNC_ALIAS aeabi_idiv divsi3
+#if defined(__OPTIMIZE_SIZE__)

         cmp     divisor, #0
         beq     LSYM(Ldiv0)
@@ -1115,7 +1253,7 @@ LSYM(Lover11):
         blo     LSYM(Lgot_result)

         THUMB_DIV_MOD_BODY 0
-
+
         mov     r0, result
         mov     work, ip
         cmp     work, #0
@@ -1124,6 +1262,20 @@ LSYM(Lover11):
  LSYM(Lover12):
         pop     { work }
         RET
+#else
+
+LSYM(divsi3_skip_div0_test):
+       cpy     curbit, dividend
+       orr     curbit, divisor
+       bmi     LSYM(Lthumb1_div_negative)
+
+LSYM(Lthumb1_div_positive):
+       THUMB1_Div_Positive
+
+LSYM(Lthumb1_div_negative):
+       THUMB1_Div_Negative
+
+#endif /* __OPTIMIZE_SIZE__ */

  #elif defined(__ARM_ARCH_EXT_IDIV__)

@@ -1136,8 +1288,8 @@ LSYM(Lover12):
         RET

  #else /* ARM/Thumb-2 version.  */
-
-       ARM_FUNC_START divsi3
+
+       ARM_FUNC_START divsi3
         ARM_FUNC_ALIAS aeabi_idiv divsi3

         cmp     r1, #0
===============================================








Reply via email to