These are 2-5 instructions smaller and just as fast. Branches are minimized, which will allow easier adaptation to Thumb-2/ARM mode.
gcc/libgcc/ChangeLog: 2022-10-09 Daniel Engel <g...@danielengel.com> * config/arm/eabi/lcmp.S (__aeabi_lcmp, __aeabi_ulcmp): Replaced; add macro configuration to build __cmpdi2() and __ucmpdi2(). * config/arm/t-elf (LIB1ASMFUNCS): Added _cmpdi2 and _ucmpdi2. --- libgcc/config/arm/eabi/lcmp.S | 151 +++++++++++++++++++++++++--------- libgcc/config/arm/t-elf | 2 + 2 files changed, 112 insertions(+), 41 deletions(-) diff --git a/libgcc/config/arm/eabi/lcmp.S b/libgcc/config/arm/eabi/lcmp.S index 336db1d398c..99c7970ecba 100644 --- a/libgcc/config/arm/eabi/lcmp.S +++ b/libgcc/config/arm/eabi/lcmp.S @@ -1,8 +1,7 @@ -/* Miscellaneous BPABI functions. Thumb-1 implementation, suitable for ARMv4T, - ARMv6-M and ARMv8-M Baseline like ISA variants. +/* lcmp.S: Thumb-1 optimized 64-bit integer comparison - Copyright (C) 2006-2020 Free Software Foundation, Inc. - Contributed by CodeSourcery. + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Daniel Engel, Senva Inc (g...@danielengel.com) This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -24,50 +23,120 @@ <http://www.gnu.org/licenses/>. */ +#if defined(L_aeabi_lcmp) || defined(L_cmpdi2) + #ifdef L_aeabi_lcmp + #define LCMP_NAME aeabi_lcmp + #define LCMP_SECTION .text.sorted.libgcc.lcmp +#else + #define LCMP_NAME cmpdi2 + #define LCMP_SECTION .text.sorted.libgcc.cmpdi2 +#endif + +// int __aeabi_lcmp(long long, long long) +// int __cmpdi2(long long, long long) +// Compares the 64 bit signed values in $r1:$r0 and $r3:$r2. +// lcmp() returns $r0 = { -1, 0, +1 } for orderings { <, ==, > } respectively. +// cmpdi2() returns $r0 = { 0, 1, 2 } for orderings { <, ==, > } respectively. +// Object file duplication assumes typical programs follow one runtime ABI. +FUNC_START_SECTION LCMP_NAME LCMP_SECTION + CFI_START_FUNCTION + + // Calculate the difference $r1:$r0 - $r3:$r2. + subs xxl, yyl + sbcs xxh, yyh + + // With $r2 free, create a known offset value without affecting + // the N or Z flags. + // BUG? The originally unified instruction for v6m was 'mov r2, r3'. + // However, this resulted in a compile error with -mthumb: + // "MOV Rd, Rs with two low registers not permitted". + // Since unified syntax deprecates the "cpy" instruction, shouldn't + // there be a backwards-compatible tranlation available? + cpy r2, r3 + + // Evaluate the comparison result. + blt LLSYM(__lcmp_lt) + + // The reference offset ($r2 - $r3) will be +2 iff the first + // argument is larger, otherwise the offset value remains 0. + adds r2, #2 + + // Check for zero (equality in 64 bits). + // It doesn't matter which register was originally "hi". + orrs r0, r1 + + // The result is already 0 on equality. + beq LLSYM(__lcmp_return) + + LLSYM(__lcmp_lt): + // Create +1 or -1 from the offset value defined earlier. + adds r3, #1 + subs r0, r2, r3 + + LLSYM(__lcmp_return): + #ifdef L_cmpdi2 + // Offset to the correct output specification. + adds r0, #1 + #endif -FUNC_START aeabi_lcmp - cmp xxh, yyh - beq 1f - bgt 2f - movs r0, #1 - negs r0, r0 - RET -2: - movs r0, #1 - RET -1: - subs r0, xxl, yyl - beq 1f - bhi 2f - movs r0, #1 - negs r0, r0 - RET -2: - movs r0, #1 -1: RET - FUNC_END aeabi_lcmp -#endif /* L_aeabi_lcmp */ + CFI_END_FUNCTION +FUNC_END LCMP_NAME + +#endif /* L_aeabi_lcmp || L_cmpdi2 */ + + +#if defined(L_aeabi_ulcmp) || defined(L_ucmpdi2) #ifdef L_aeabi_ulcmp + #define ULCMP_NAME aeabi_ulcmp + #define ULCMP_SECTION .text.sorted.libgcc.ulcmp +#else + #define ULCMP_NAME ucmpdi2 + #define ULCMP_SECTION .text.sorted.libgcc.ucmpdi2 +#endif + +// int __aeabi_ulcmp(unsigned long long, unsigned long long) +// int __ucmpdi2(unsigned long long, unsigned long long) +// Compares the 64 bit unsigned values in $r1:$r0 and $r3:$r2. +// ulcmp() returns $r0 = { -1, 0, +1 } for orderings { <, ==, > } respectively. +// ucmpdi2() returns $r0 = { 0, 1, 2 } for orderings { <, ==, > } respectively. +// Object file duplication assumes typical programs follow one runtime ABI. +FUNC_START_SECTION ULCMP_NAME ULCMP_SECTION + CFI_START_FUNCTION + + // Calculate the 'C' flag. + subs xxl, yyl + sbcs xxh, yyh + + // Capture the carry flg. + // $r2 will contain -1 if the first value is smaller, + // 0 if the first value is larger or equal. + sbcs r2, r2 + + // Check for zero (equality in 64 bits). + // It doesn't matter which register was originally "hi". + orrs r0, r1 + + // The result is already 0 on equality. + beq LLSYM(__ulcmp_return) + + // Assume +1. If -1 is correct, $r2 will override. + movs r0, #1 + orrs r0, r2 + + LLSYM(__ulcmp_return): + #ifdef L_ucmpdi2 + // Offset to the correct output specification. + adds r0, #1 + #endif -FUNC_START aeabi_ulcmp - cmp xxh, yyh - bne 1f - subs r0, xxl, yyl - beq 2f -1: - bcs 1f - movs r0, #1 - negs r0, r0 - RET -1: - movs r0, #1 -2: RET - FUNC_END aeabi_ulcmp -#endif /* L_aeabi_ulcmp */ + CFI_END_FUNCTION +FUNC_END ULCMP_NAME + +#endif /* L_aeabi_ulcmp || L_ucmpdi2 */ diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf index 2e3f04aa2f0..83325410097 100644 --- a/libgcc/config/arm/t-elf +++ b/libgcc/config/arm/t-elf @@ -41,6 +41,8 @@ LIB1ASMFUNCS += \ _ffsdi2 \ _paritydi2 \ _popcountdi2 \ + _cmpdi2 \ + _ucmpdi2 \ _dvmd_tls \ _divsi3 \ _modsi3 \ -- 2.34.1