At present, `atomic_16.S' groups different implementations of the same functions together in the file. Therefore, as an example, the LSE2 implementation of `load_16' follows on immediately from its core implementation, as does the `store_16' LSE2 implementation.
Such architectural extension-dependent implementations are dependent on ifunc support, such that they are guarded by the relevant preprocessor macro, i.e. `#if HAVE_IFUNC'. Having to apply these guards on a per-function basis adds unnecessary clutter to the file and makes its maintenance more error-prone. We therefore reorganize the layout of the file in such a way that all core implementations needing no `#ifdef's are placed first, followed by all ifunc-dependent implementations, which can all be guarded by a single `#if HAVE_IFUNC', greatly reducing the overall number of required `#ifdef' macros. libatomic/ChangeLog: * config/linux/aarch64/atomic_16.S: Reorganize functions in file. (HAVE_FEAT_LSE2): Delete. --- libatomic/config/linux/aarch64/atomic_16.S | 445 +++++++++++---------- 1 file changed, 223 insertions(+), 222 deletions(-) diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index 11a296dacc3..c44c31c6418 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -40,8 +40,6 @@ #include "auto-config.h" -#define HAVE_FEAT_LSE2 HAVE_IFUNC - .arch armv8-a+lse #define LSE128(NAME) libat_##NAME##_i1 @@ -116,6 +114,9 @@ NAME: \ #define SEQ_CST 5 +/* Core implementations: Not dependent on the presence of further architectural + extensions. */ + ENTRY (load_16) mov x5, x0 cbnz w1, 2f @@ -134,31 +135,6 @@ ENTRY (load_16) END (load_16) -#if HAVE_FEAT_LSE2 -ENTRY_FEAT (load_16, LSE2) - cbnz w1, 1f - - /* RELAXED. */ - ldp res0, res1, [x0] - ret -1: - cmp w1, SEQ_CST - b.eq 2f - - /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ - ldp res0, res1, [x0] - dmb ishld - ret - - /* SEQ_CST. */ -2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ - ldp res0, res1, [x0] - dmb ishld - ret -END_FEAT (load_16, LSE2) -#endif - - ENTRY (store_16) cbnz w4, 2f @@ -176,23 +152,6 @@ ENTRY (store_16) END (store_16) -#if HAVE_FEAT_LSE2 -ENTRY_FEAT (store_16, LSE2) - cbnz w4, 1f - - /* RELAXED. */ - stp in0, in1, [x0] - ret - - /* RELEASE/SEQ_CST. */ -1: ldxp xzr, tmp0, [x0] - stlxp w4, in0, in1, [x0] - cbnz w4, 1b - ret -END_FEAT (store_16, LSE2) -#endif - - ENTRY (exchange_16) mov x5, x0 cbnz w4, 2f @@ -220,32 +179,6 @@ ENTRY (exchange_16) END (exchange_16) -ENTRY_FEAT (exchange_16, LSE128) - mov tmp0, x0 - mov res0, in0 - mov res1, in1 - cbnz w4, 1f - - /* RELAXED. */ - /* swpp res0, res1, [tmp0] */ - .inst 0x192180c0 - ret -1: - cmp w4, ACQUIRE - b.hi 2f - - /* ACQUIRE/CONSUME. */ - /* swppa res0, res1, [tmp0] */ - .inst 0x19a180c0 - ret - - /* RELEASE/ACQ_REL/SEQ_CST. */ -2: /* swppal res0, res1, [tmp0] */ - .inst 0x19e180c0 - ret -END_FEAT (exchange_16, LSE128) - - ENTRY (compare_exchange_16) ldp exp0, exp1, [x1] cbz w4, 3f @@ -293,42 +226,6 @@ ENTRY (compare_exchange_16) END (compare_exchange_16) -#if HAVE_FEAT_LSE2 -ENTRY_FEAT (compare_exchange_16, LSE) - ldp exp0, exp1, [x1] - mov tmp0, exp0 - mov tmp1, exp1 - cbz w4, 2f - cmp w4, RELEASE - b.hs 3f - - /* ACQUIRE/CONSUME. */ - caspa exp0, exp1, in0, in1, [x0] -0: - cmp exp0, tmp0 - ccmp exp1, tmp1, 0, eq - bne 1f - mov x0, 1 - ret -1: - stp exp0, exp1, [x1] - mov x0, 0 - ret - - /* RELAXED. */ -2: casp exp0, exp1, in0, in1, [x0] - b 0b - - /* RELEASE. */ -3: b.hi 4f - caspl exp0, exp1, in0, in1, [x0] - b 0b - - /* ACQ_REL/SEQ_CST. */ -4: caspal exp0, exp1, in0, in1, [x0] - b 0b -END_FEAT (compare_exchange_16, LSE) -#endif ENTRY_ALIASED (fetch_add_16) @@ -441,32 +338,6 @@ ENTRY (fetch_or_16) END (fetch_or_16) -ENTRY_FEAT (fetch_or_16, LSE128) - mov tmp0, x0 - mov res0, in0 - mov res1, in1 - cbnz w4, 1f - - /* RELAXED. */ - /* ldsetp res0, res1, [tmp0] */ - .inst 0x192130c0 - ret -1: - cmp w4, ACQUIRE - b.hi 2f - - /* ACQUIRE/CONSUME. */ - /* ldsetpa res0, res1, [tmp0] */ - .inst 0x19a130c0 - ret - - /* RELEASE/ACQ_REL/SEQ_CST. */ -2: /* ldsetpal res0, res1, [tmp0] */ - .inst 0x19e130c0 - ret -END_FEAT (fetch_or_16, LSE128) - - ENTRY (or_fetch_16) mov x5, x0 cbnz w4, 2f @@ -489,37 +360,6 @@ ENTRY (or_fetch_16) END (or_fetch_16) -ENTRY_FEAT (or_fetch_16, LSE128) - cbnz w4, 1f - mov tmp0, in0 - mov tmp1, in1 - - /* RELAXED. */ - /* ldsetp in0, in1, [x0] */ - .inst 0x19233002 - orr res0, in0, tmp0 - orr res1, in1, tmp1 - ret -1: - cmp w4, ACQUIRE - b.hi 2f - - /* ACQUIRE/CONSUME. */ - /* ldsetpa in0, in1, [x0] */ - .inst 0x19a33002 - orr res0, in0, tmp0 - orr res1, in1, tmp1 - ret - - /* RELEASE/ACQ_REL/SEQ_CST. */ -2: /* ldsetpal in0, in1, [x0] */ - .inst 0x19e33002 - orr res0, in0, tmp0 - orr res1, in1, tmp1 - ret -END_FEAT (or_fetch_16, LSE128) - - ENTRY (fetch_and_16) mov x5, x0 cbnz w4, 2f @@ -542,33 +382,6 @@ ENTRY (fetch_and_16) END (fetch_and_16) -ENTRY_FEAT (fetch_and_16, LSE128) - mov tmp0, x0 - mvn res0, in0 - mvn res1, in1 - cbnz w4, 1f - - /* RELAXED. */ - /* ldclrp res0, res1, [tmp0] */ - .inst 0x192110c0 - ret - -1: - cmp w4, ACQUIRE - b.hi 2f - - /* ACQUIRE/CONSUME. */ - /* ldclrpa res0, res1, [tmp0] */ - .inst 0x19a110c0 - ret - - /* RELEASE/ACQ_REL/SEQ_CST. */ -2: /* ldclrpal res0, res1, [tmp0] */ - .inst 0x19e110c0 - ret -END_FEAT (fetch_and_16, LSE128) - - ENTRY (and_fetch_16) mov x5, x0 cbnz w4, 2f @@ -591,38 +404,6 @@ ENTRY (and_fetch_16) END (and_fetch_16) -ENTRY_FEAT (and_fetch_16, LSE128) - mvn tmp0, in0 - mvn tmp0, in1 - cbnz w4, 1f - - /* RELAXED. */ - /* ldclrp tmp0, tmp1, [x0] */ - .inst 0x19271006 - and res0, tmp0, in0 - and res1, tmp1, in1 - ret - -1: - cmp w4, ACQUIRE - b.hi 2f - - /* ACQUIRE/CONSUME. */ - /* ldclrpa tmp0, tmp1, [x0] */ - .inst 0x19a71006 - and res0, tmp0, in0 - and res1, tmp1, in1 - ret - - /* RELEASE/ACQ_REL/SEQ_CST. */ -2: /* ldclrpal tmp0, tmp1, [x5] */ - .inst 0x19e710a6 - and res0, tmp0, in0 - and res1, tmp1, in1 - ret -END_FEAT (and_fetch_16, LSE128) - - ENTRY_ALIASED (fetch_xor_16) mov x5, x0 cbnz w4, 2f @@ -728,6 +509,226 @@ ENTRY_ALIASED (test_and_set_16) END (test_and_set_16) +#if HAVE_IFUNC +/* ifunc implementations: Carries run-time dependence on the presence of further + architectural extensions. */ + +ENTRY_FEAT (exchange_16, LSE128) + mov tmp0, x0 + mov res0, in0 + mov res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + /* swpp res0, res1, [tmp0] */ + .inst 0x192180c0 + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + /* swppa res0, res1, [tmp0] */ + .inst 0x19a180c0 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: /* swppal res0, res1, [tmp0] */ + .inst 0x19e180c0 + ret +END_FEAT (exchange_16, LSE128) + + +ENTRY_FEAT (fetch_or_16, LSE128) + mov tmp0, x0 + mov res0, in0 + mov res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + /* ldsetp res0, res1, [tmp0] */ + .inst 0x192130c0 + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + /* ldsetpa res0, res1, [tmp0] */ + .inst 0x19a130c0 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: /* ldsetpal res0, res1, [tmp0] */ + .inst 0x19e130c0 + ret +END_FEAT (fetch_or_16, LSE128) + + +ENTRY_FEAT (or_fetch_16, LSE128) + cbnz w4, 1f + mov tmp0, in0 + mov tmp1, in1 + + /* RELAXED. */ + /* ldsetp in0, in1, [x0] */ + .inst 0x19233002 + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + /* ldsetpa in0, in1, [x0] */ + .inst 0x19a33002 + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: /* ldsetpal in0, in1, [x0] */ + .inst 0x19e33002 + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret +END_FEAT (or_fetch_16, LSE128) + + +ENTRY_FEAT (fetch_and_16, LSE128) + mov tmp0, x0 + mvn res0, in0 + mvn res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + /* ldclrp res0, res1, [tmp0] */ + .inst 0x192110c0 + ret + +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + /* ldclrpa res0, res1, [tmp0] */ + .inst 0x19a110c0 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: /* ldclrpal res0, res1, [tmp0] */ + .inst 0x19e110c0 + ret +END_FEAT (fetch_and_16, LSE128) + + +ENTRY_FEAT (and_fetch_16, LSE128) + mvn tmp0, in0 + mvn tmp0, in1 + cbnz w4, 1f + + /* RELAXED. */ + /* ldclrp tmp0, tmp1, [x0] */ + .inst 0x19271006 + and res0, tmp0, in0 + and res1, tmp1, in1 + ret + +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + /* ldclrpa tmp0, tmp1, [x0] */ + .inst 0x19a71006 + and res0, tmp0, in0 + and res1, tmp1, in1 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: /* ldclrpal tmp0, tmp1, [x5] */ + .inst 0x19e710a6 + and res0, tmp0, in0 + and res1, tmp1, in1 + ret +END_FEAT (and_fetch_16, LSE128) + + +ENTRY_FEAT (load_16, LSE2) + cbnz w1, 1f + + /* RELAXED. */ + ldp res0, res1, [x0] + ret +1: + cmp w1, SEQ_CST + b.eq 2f + + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ + ldp res0, res1, [x0] + dmb ishld + ret + + /* SEQ_CST. */ +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ + ldp res0, res1, [x0] + dmb ishld + ret +END_FEAT (load_16, LSE2) + + +ENTRY_FEAT (store_16, LSE2) + cbnz w4, 1f + + /* RELAXED. */ + stp in0, in1, [x0] + ret + + /* RELEASE/SEQ_CST. */ +1: ldxp xzr, tmp0, [x0] + stlxp w4, in0, in1, [x0] + cbnz w4, 1b + ret +END_FEAT (store_16, LSE2) + + +ENTRY_FEAT (compare_exchange_16, LSE) + ldp exp0, exp1, [x1] + mov tmp0, exp0 + mov tmp1, exp1 + cbz w4, 2f + cmp w4, RELEASE + b.hs 3f + + /* ACQUIRE/CONSUME. */ + caspa exp0, exp1, in0, in1, [x0] +0: + cmp exp0, tmp0 + ccmp exp1, tmp1, 0, eq + bne 1f + mov x0, 1 + ret +1: + stp exp0, exp1, [x1] + mov x0, 0 + ret + + /* RELAXED. */ +2: casp exp0, exp1, in0, in1, [x0] + b 0b + + /* RELEASE. */ +3: b.hi 4f + caspl exp0, exp1, in0, in1, [x0] + b 0b + + /* ACQ_REL/SEQ_CST. */ +4: caspal exp0, exp1, in0, in1, [x0] + b 0b +END_FEAT (compare_exchange_16, LSE) +#endif + /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ #define FEATURE_1_AND 0xc0000000 #define FEATURE_1_BTI 1 -- 2.34.1