This patch is mainly to fix a regression on trunk with support for
__ctzsi2 on cores that do not have the CLZ instruction (notably pre-v5
and cortex-m0). It's needed after a recent change to longlong.h which
now defines count_trailing_zeros to use the GCC builtin: unfortunately
that builtin is used in the default libgcc definition of __ctzsi2 and
gcc furthermore falls back when it has no sequence to implement that
builting by calling the __ctzsi2 helper function...
The patch addresses the problem by providing suitable assembly-language
definitions that break the recursive loop. They're also marginally
optimized over the alternative of calling __clzsi2 to provide the
implementation.
Tested on bare metal for both thumb1 and pre-v5 ARM and committed to trunk.
No need for additional tests, this fixes FAILs on the FFS2 tests.
* arm/lib1funcs.asm (ctzsi2): New function.
* arm/t-elf (LIB1ASMFUNCS): Add _ctzsi2.
* arm/t-linux (LIB1ASMFUNCS): Likewise.
* arm/t-strongarm-elf (LIB1ASMFUNCS): Likewise.
* arm/t-symbian (LIB1ASMFUNCS): Likewise.
* arm/t-vxworks (LIB1ASMFUNCS): Likewise.
* arm/t-wince-pe (LIB1ASMFUNCS): Likewise.
R.
--- config/arm/lib1funcs.S (revision 185620)
+++ config/arm/lib1funcs.S (local)
@@ -1594,6 +1594,70 @@ ARM_FUNC_START clzdi2
#endif
#endif /* L_clzdi2 */
+#ifdef L_ctzsi2
+#if defined(__ARM_ARCH_6M__)
+FUNC_START ctzsi2
+ neg r1, r0
+ and r0, r0, r1
+ mov r1, #28
+ mov r3, #1
+ lsl r3, r3, #16
+ cmp r0, r3 /* 0x10000 */
+ bcc 2f
+ lsr r0, r0, #16
+ sub r1, r1, #16
+2: lsr r3, r3, #8
+ cmp r0, r3 /* #0x100 */
+ bcc 2f
+ lsr r0, r0, #8
+ sub r1, r1, #8
+2: lsr r3, r3, #4
+ cmp r0, r3 /* #0x10 */
+ bcc 2f
+ lsr r0, r0, #4
+ sub r1, r1, #4
+2: adr r2, 1f
+ ldrb r0, [r2, r0]
+ sub r0, r0, r1
+ bx lr
+.align 2
+1:
+.byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
+ FUNC_END ctzsi2
+#else
+ARM_FUNC_START ctzsi2
+ rsb r1, r0, #0
+ and r0, r0, r1
+# if defined(HAVE_ARM_CLZ)
+ clz r0, r0
+ rsb r0, r0, #31
+ RET
+# else
+ mov r1, #28
+ cmp r0, #0x10000
+ do_it cs, t
+ movcs r0, r0, lsr #16
+ subcs r1, r1, #16
+ cmp r0, #0x100
+ do_it cs, t
+ movcs r0, r0, lsr #8
+ subcs r1, r1, #8
+ cmp r0, #0x10
+ do_it cs, t
+ movcs r0, r0, lsr #4
+ subcs r1, r1, #4
+ adr r2, 1f
+ ldrb r0, [r2, r0]
+ sub r0, r0, r1
+ RET
+.align 2
+1:
+.byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
+# endif /* !HAVE_ARM_CLZ */
+ FUNC_END ctzsi2
+#endif
+#endif /* L_clzsi2 */
+
/* ------------------------------------------------------------------------ */
/* These next two sections are here despite the fact that they contain Thumb
assembler because their presence allows interworked code to be linked even
--- config/arm/t-elf (revision 185620)
+++ config/arm/t-elf (local)
@@ -10,7 +10,7 @@ LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi
_arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
_arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
_arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
- _clzsi2 _clzdi2
+ _clzsi2 _clzdi2 _ctzsi2
# Currently there is a bug somewhere in GCC's alias analysis
# or scheduling code that is breaking _fpmul_parts in fp-bit.c.
--- config/arm/t-linux (revision 185620)
+++ config/arm/t-linux (local)
@@ -1,6 +1,6 @@
LIB1ASMSRC = arm/lib1funcs.S
LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
- _arm_addsubdf3 _arm_addsubsf3
+ _ctzsi2 _arm_addsubdf3 _arm_addsubsf3
# Just for these, we omit the frame pointer since it makes such a big
# difference.
--- config/arm/t-strongarm-elf (revision 185620)
+++ config/arm/t-strongarm-elf (local)
@@ -1,4 +1,4 @@
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func
_clzsi2 _clzdi2
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func
_clzsi2 _clzdi2 _ctzsi2
# Currently there is a bug somewhere in GCC's alias analysis
# or scheduling code that is breaking _fpmul_parts in fp-bit.c.
--- config/arm/t-symbian (revision 185620)
+++ config/arm/t-symbian (local)
@@ -1,4 +1,4 @@
-LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2
_clzdi2
+LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2
_clzdi2 _ctzsi2
# These functions have __aeabi equivalents and will never be called by GCC.
# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
--- config/arm/t-vxworks (revision 185620)
+++ config/arm/t-vxworks (local)
@@ -1 +1 @@
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func
_call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func
_call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 _ctzsi2
--- config/arm/t-wince-pe (revision 185620)
+++ config/arm/t-wince-pe (local)
@@ -1 +1 @@
-LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX
_interwork_call_via_rX _clzsi2 _clzdi2
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX
_interwork_call_via_rX _clzsi2 _clzdi2 _ctzsi2