This patch is mainly to fix a regression on trunk with support for
__ctzsi2 on cores that do not have the CLZ instruction (notably pre-v5
and cortex-m0).  It's needed after a recent change to longlong.h which
now defines count_trailing_zeros to use the GCC builtin: unfortunately
that builtin is used in the default libgcc definition of __ctzsi2 and
gcc furthermore falls back when it has no sequence to implement that
builting by calling the __ctzsi2 helper function...

The patch addresses the problem by providing suitable assembly-language
definitions that break the recursive loop.  They're also marginally
optimized over the alternative of calling __clzsi2 to provide the
implementation.

Tested on bare metal for both thumb1 and pre-v5 ARM and committed to trunk.

No need for additional tests, this fixes FAILs on the FFS2 tests.

        * arm/lib1funcs.asm (ctzsi2): New function.
        * arm/t-elf (LIB1ASMFUNCS): Add _ctzsi2.
        * arm/t-linux (LIB1ASMFUNCS): Likewise.
        * arm/t-strongarm-elf (LIB1ASMFUNCS): Likewise.
        * arm/t-symbian (LIB1ASMFUNCS): Likewise.
        * arm/t-vxworks (LIB1ASMFUNCS): Likewise.
        * arm/t-wince-pe (LIB1ASMFUNCS): Likewise.

R.
--- config/arm/lib1funcs.S      (revision 185620)
+++ config/arm/lib1funcs.S      (local)
@@ -1594,6 +1594,70 @@ ARM_FUNC_START clzdi2
 #endif
 #endif /* L_clzdi2 */
 
+#ifdef L_ctzsi2
+#if defined(__ARM_ARCH_6M__)
+FUNC_START ctzsi2
+       neg     r1, r0
+       and     r0, r0, r1
+       mov     r1, #28
+       mov     r3, #1
+       lsl     r3, r3, #16
+       cmp     r0, r3 /* 0x10000 */
+       bcc     2f
+       lsr     r0, r0, #16
+       sub     r1, r1, #16
+2:     lsr     r3, r3, #8
+       cmp     r0, r3 /* #0x100 */
+       bcc     2f
+       lsr     r0, r0, #8
+       sub     r1, r1, #8
+2:     lsr     r3, r3, #4
+       cmp     r0, r3 /* #0x10 */
+       bcc     2f
+       lsr     r0, r0, #4
+       sub     r1, r1, #4
+2:     adr     r2, 1f
+       ldrb    r0, [r2, r0]
+       sub     r0, r0, r1
+       bx lr
+.align 2
+1:
+.byte  27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
+       FUNC_END ctzsi2
+#else
+ARM_FUNC_START ctzsi2
+       rsb     r1, r0, #0
+       and     r0, r0, r1
+# if defined(HAVE_ARM_CLZ)
+       clz     r0, r0
+       rsb     r0, r0, #31
+       RET
+# else
+       mov     r1, #28
+       cmp     r0, #0x10000
+       do_it   cs, t
+       movcs   r0, r0, lsr #16
+       subcs   r1, r1, #16
+       cmp     r0, #0x100
+       do_it   cs, t
+       movcs   r0, r0, lsr #8
+       subcs   r1, r1, #8
+       cmp     r0, #0x10
+       do_it   cs, t
+       movcs   r0, r0, lsr #4
+       subcs   r1, r1, #4
+       adr     r2, 1f
+       ldrb    r0, [r2, r0]
+       sub     r0, r0, r1
+       RET
+.align 2
+1:
+.byte  27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
+# endif /* !HAVE_ARM_CLZ */
+       FUNC_END ctzsi2
+#endif
+#endif /* L_clzsi2 */
+
 /* ------------------------------------------------------------------------ */
 /* These next two sections are here despite the fact that they contain Thumb 
    assembler because their presence allows interworked code to be linked even
--- config/arm/t-elf    (revision 185620)
+++ config/arm/t-elf    (local)
@@ -10,7 +10,7 @@ LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi
        _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
        _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
        _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
-       _clzsi2 _clzdi2 
+       _clzsi2 _clzdi2 _ctzsi2
 
 # Currently there is a bug somewhere in GCC's alias analysis
 # or scheduling code that is breaking _fpmul_parts in fp-bit.c.
--- config/arm/t-linux  (revision 185620)
+++ config/arm/t-linux  (local)
@@ -1,6 +1,6 @@
 LIB1ASMSRC = arm/lib1funcs.S
 LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
-       _arm_addsubdf3 _arm_addsubsf3
+       _ctzsi2 _arm_addsubdf3 _arm_addsubsf3
 
 # Just for these, we omit the frame pointer since it makes such a big
 # difference.
--- config/arm/t-strongarm-elf  (revision 185620)
+++ config/arm/t-strongarm-elf  (local)
@@ -1,4 +1,4 @@
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func 
_clzsi2 _clzdi2
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func 
_clzsi2 _clzdi2 _ctzsi2
 
 # Currently there is a bug somewhere in GCC's alias analysis
 # or scheduling code that is breaking _fpmul_parts in fp-bit.c.
--- config/arm/t-symbian        (revision 185620)
+++ config/arm/t-symbian        (local)
@@ -1,4 +1,4 @@
-LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 
_clzdi2
+LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 
_clzdi2 _ctzsi2
 
 # These functions have __aeabi equivalents and will never be called by GCC.  
 # By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
--- config/arm/t-vxworks        (revision 185620)
+++ config/arm/t-vxworks        (local)
@@ -1 +1 @@
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func 
_call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func 
_call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 _ctzsi2
--- config/arm/t-wince-pe       (revision 185620)
+++ config/arm/t-wince-pe       (local)
@@ -1 +1 @@
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX 
_interwork_call_via_rX _clzsi2 _clzdi2
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX 
_interwork_call_via_rX _clzsi2 _clzdi2 _ctzsi2

Reply via email to