The armv9.4-a architectural revision adds three new atomic operations associated with the LSE128 feature:
* LDCLRP - Atomic AND NOT (bitclear) of a location with 128-bit value held in a pair of registers, with original data loaded into the same 2 registers. * LDSETP - Atomic OR (bitset) of a location with 128-bit value held in a pair of registers, with original data loaded into the same 2 registers. * SWPP - Atomic swap of one 128-bit value with 128-bit value held in a pair of registers. This patch adds the logic required to make use of these when the architectural feature is present and a suitable assembler available. In order to do this, the following changes are made: 1. Add a configure-time check to check for LSE128 support in the assembler. 2. Edit host-config.h so that when N == 16, nifunc = 2. 3. Where available due to LSE128, implement the second ifunc, making use of the novel instructions. 4. For atomic functions unable to make use of these new instructions, define a new alias which causes the _i1 function variant to point ahead to the corresponding _i2 implementation. libatomic/ChangeLog: * Makefile.am (AM_CPPFLAGS): add conditional setting of -DHAVE_FEAT_LSE128. * acinclude.m4 (LIBAT_TEST_FEAT_LSE128): New. * config/linux/aarch64/atomic_16.S (LSE128): New macro definition. (libat_exchange_16): New LSE128 variant. (libat_fetch_or_16): Likewise. (libat_or_fetch_16): Likewise. (libat_fetch_and_16): Likewise. (libat_and_fetch_16): Likewise. * config/linux/aarch64/host-config.h (IFUNC_COND_2): New. (IFUNC_NCOND): Add operand size checking. (has_lse2): Renamed from `ifunc1`. (has_lse128): New. (HAS_LSE128): Likewise. * libatomic/configure.ac: Add call to LIBAT_TEST_FEAT_LSE128. * configure (ac_subst_vars): Regenerated via autoreconf. * libatomic/Makefile.in: Likewise. * libatomic/auto-config.h.in: Likewise. --- libatomic/Makefile.am | 3 + libatomic/Makefile.in | 1 + libatomic/acinclude.m4 | 19 +++ libatomic/auto-config.h.in | 3 + libatomic/config/linux/aarch64/atomic_16.S | 170 ++++++++++++++++++- libatomic/config/linux/aarch64/host-config.h | 27 ++- libatomic/configure | 59 ++++++- libatomic/configure.ac | 1 + 8 files changed, 274 insertions(+), 9 deletions(-) diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am index c0b8dea5037..24e843db67d 100644 --- a/libatomic/Makefile.am +++ b/libatomic/Makefile.am @@ -130,6 +130,9 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix _$(s)_.lo,$(SIZEOBJS))) ## On a target-specific basis, include alternates to be selected by IFUNC. if HAVE_IFUNC if ARCH_AARCH64_LINUX +if ARCH_AARCH64_HAVE_LSE128 +AM_CPPFLAGS = -DHAVE_FEAT_LSE128 +endif IFUNC_OPTIONS = -march=armv8-a+lse libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS))) libatomic_la_SOURCES += atomic_16.S diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in index dc2330b91fd..cd48fa21334 100644 --- a/libatomic/Makefile.in +++ b/libatomic/Makefile.in @@ -452,6 +452,7 @@ M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files))) libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \ _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \ $(am__append_4) $(am__append_5) +@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS = -DHAVE_FEAT_LSE128 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586 diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4 index f35ab5b60a5..4197db8f404 100644 --- a/libatomic/acinclude.m4 +++ b/libatomic/acinclude.m4 @@ -83,6 +83,25 @@ AC_DEFUN([LIBAT_TEST_ATOMIC_BUILTIN],[ ]) ]) +dnl +dnl Test if the host assembler supports armv9.4-a LSE128 isns. +dnl +AC_DEFUN([LIBAT_TEST_FEAT_LSE128],[ + AC_CACHE_CHECK([for armv9.4-a LSE128 insn support], + [libat_cv_have_feat_lse128],[ + AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[asm(".arch armv9-a+lse128")])]) + if AC_TRY_EVAL(ac_link); then + eval libat_cv_have_feat_lse128=yes + else + eval libat_cv_have_feat_lse128=no + fi + rm -f conftest* + ]) + LIBAT_DEFINE_YESNO([HAVE_FEAT_LSE128], [$libat_cv_have_feat_lse128], + [Have LSE128 support for 16 byte integers.]) + AM_CONDITIONAL([ARCH_AARCH64_HAVE_LSE128], [test x$libat_cv_have_feat_lse128 = xyes]) +]) + dnl dnl Test if we have __atomic_load and __atomic_store for mode $1, size $2 dnl diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in index ab3424a759e..7c78933b07d 100644 --- a/libatomic/auto-config.h.in +++ b/libatomic/auto-config.h.in @@ -105,6 +105,9 @@ /* Define to 1 if you have the <dlfcn.h> header file. */ #undef HAVE_DLFCN_H +/* Have LSE128 support for 16 byte integers. */ +#undef HAVE_FEAT_LSE128 + /* Define to 1 if you have the <fenv.h> header file. */ #undef HAVE_FENV_H diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index 3f6225830e6..44a773031f8 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -34,10 +34,14 @@ writes, this will be true when using atomics in actual code. The libat_<op>_16 entry points are ARMv8.0. - The libat_<op>_16_i1 entry points are used when LSE2 is available. */ - + The libat_<op>_16_i1 entry points are used when LSE128 is available. + The libat_<op>_16_i2 entry points are used when LSE2 is available. */ +#if HAVE_FEAT_LSE128 + .arch armv8-a+lse128 +#else .arch armv8-a+lse +#endif #define ENTRY(name, feat) \ ENTRY1(name, feat) @@ -66,7 +70,8 @@ name##feat: \ .set alias##from, alias##to; #define CORE -#define LSE2 _i1 +#define LSE128 _i1 +#define LSE2 _i2 #define res0 x0 #define res1 x1 @@ -201,6 +206,31 @@ ENTRY (libat_exchange_16, CORE) END (libat_exchange_16, CORE) +#if HAVE_FEAT_LSE128 +ENTRY (libat_exchange_16, LSE128) + mov tmp0, x0 + mov res0, in0 + mov res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + swpp res0, res1, [tmp0] + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + swppa res0, res1, [tmp0] + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: swppal res0, res1, [tmp0] + ret +END (libat_exchange_16, LSE128) +#endif + + ENTRY (libat_compare_exchange_16, CORE) ldp exp0, exp1, [x1] cbz w4, 3f @@ -389,6 +419,31 @@ ENTRY (libat_fetch_or_16, CORE) END (libat_fetch_or_16, CORE) +#if HAVE_FEAT_LSE128 +ENTRY (libat_fetch_or_16, LSE128) + mov tmp0, x0 + mov res0, in0 + mov res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + ldsetp res0, res1, [tmp0] + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldsetpa res0, res1, [tmp0] + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldsetpal res0, res1, [tmp0] + ret +END (libat_fetch_or_16, LSE128) +#endif + + ENTRY (libat_or_fetch_16, CORE) mov x5, x0 cbnz w4, 2f @@ -411,6 +466,36 @@ ENTRY (libat_or_fetch_16, CORE) END (libat_or_fetch_16, CORE) +#if HAVE_FEAT_LSE128 +ENTRY (libat_or_fetch_16, LSE128) + cbnz w4, 1f + mov tmp0, in0 + mov tmp1, in1 + + /* RELAXED. */ + ldsetp in0, in1, [x0] + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldsetpa in0, in1, [x0] + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldsetpal in0, in1, [x0] + orr res0, in0, tmp0 + orr res1, in1, tmp1 + ret +END (libat_or_fetch_16, LSE128) +#endif + + ENTRY (libat_fetch_and_16, CORE) mov x5, x0 cbnz w4, 2f @@ -433,6 +518,32 @@ ENTRY (libat_fetch_and_16, CORE) END (libat_fetch_and_16, CORE) +#if HAVE_FEAT_LSE128 +ENTRY (libat_fetch_and_16, LSE128) + mov tmp0, x0 + mvn res0, in0 + mvn res1, in1 + cbnz w4, 1f + + /* RELAXED. */ + ldclrp res0, res1, [tmp0] + ret + +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldclrpa res0, res1, [tmp0] + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldclrpal res0, res1, [tmp0] + ret +END (libat_fetch_and_16, LSE128) +#endif + + ENTRY (libat_and_fetch_16, CORE) mov x5, x0 cbnz w4, 2f @@ -455,6 +566,37 @@ ENTRY (libat_and_fetch_16, CORE) END (libat_and_fetch_16, CORE) +#if HAVE_FEAT_LSE128 +ENTRY (libat_and_fetch_16, LSE128) + mvn tmp0, in0 + mvn tmp0, in1 + cbnz w4, 1f + + /* RELAXED. */ + ldclrp tmp0, tmp1, [x0] + and res0, tmp0, in0 + and res1, tmp1, in1 + ret + +1: + cmp w4, ACQUIRE + b.hi 2f + + /* ACQUIRE/CONSUME. */ + ldclrpa tmp0, tmp1, [x0] + and res0, tmp0, in0 + and res1, tmp1, in1 + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +2: ldclrpal tmp0, tmp1, [x5] + and res0, tmp0, in0 + and res1, tmp1, in1 + ret +END (libat_and_fetch_16, LSE128) +#endif + + ENTRY (libat_fetch_xor_16, CORE) mov x5, x0 cbnz w4, 2f @@ -560,6 +702,28 @@ ENTRY (libat_test_and_set_16, CORE) END (libat_test_and_set_16, CORE) +/* Alias entry points which are the same in LSE2 and LSE128. */ + +#if !HAVE_FEAT_LSE128 +ALIAS (libat_exchange_16, LSE128, LSE2) +ALIAS (libat_fetch_or_16, LSE128, LSE2) +ALIAS (libat_fetch_and_16, LSE128, LSE2) +ALIAS (libat_or_fetch_16, LSE128, LSE2) +ALIAS (libat_and_fetch_16, LSE128, LSE2) +#endif +ALIAS (libat_load_16, LSE128, LSE2) +ALIAS (libat_store_16, LSE128, LSE2) +ALIAS (libat_compare_exchange_16, LSE128, LSE2) +ALIAS (libat_fetch_add_16, LSE128, LSE2) +ALIAS (libat_add_fetch_16, LSE128, LSE2) +ALIAS (libat_fetch_sub_16, LSE128, LSE2) +ALIAS (libat_sub_fetch_16, LSE128, LSE2) +ALIAS (libat_fetch_xor_16, LSE128, LSE2) +ALIAS (libat_xor_fetch_16, LSE128, LSE2) +ALIAS (libat_fetch_nand_16, LSE128, LSE2) +ALIAS (libat_nand_fetch_16, LSE128, LSE2) +ALIAS (libat_test_and_set_16, LSE128, LSE2) + /* Alias entry points which are the same in baseline and LSE2. */ ALIAS (libat_exchange_16, LSE2, CORE) diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index 30ef21c7715..d873e91b1c9 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -26,14 +26,17 @@ #ifdef HWCAP_USCAT # if N == 16 -# define IFUNC_COND_1 (ifunc1 (hwcap)) +# define IFUNC_COND_1 (has_lse128 (hwcap)) +# define IFUNC_COND_2 (has_lse2 (hwcap)) +# define IFUNC_NCOND(N) 2 # else -# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) +# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) +# define IFUNC_NCOND(N) 1 # endif #else # define IFUNC_COND_1 (false) +# define IFUNC_NCOND(N) 1 #endif -#define IFUNC_NCOND(N) (1) #endif /* HAVE_IFUNC */ @@ -56,7 +59,7 @@ #define MIDR_PARTNUM(midr) (((midr) >> 4) & 0xfff) static inline bool -ifunc1 (unsigned long hwcap) +has_lse2 (unsigned long hwcap) { if (hwcap & HWCAP_USCAT) return true; @@ -69,6 +72,22 @@ ifunc1 (unsigned long hwcap) return true; return false; } + +/* LSE128 atomic support encoded in ID_AA64ISAR0_EL1.Atomic, + bits[23:20]. The expected value is 0b0011. Check that. */ +#define HAS_LSE128() ({ \ + unsigned long val; \ + asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (val)); \ + (val & 0xf00000) >= 0x300000; \ + }) + +static inline bool +has_lse128 (unsigned long hwcap) +{ + if (has_lse2 (hwcap) && HAS_LSE128 ()) + return true; + return false; +} #endif #include_next <host-config.h> diff --git a/libatomic/configure b/libatomic/configure index d579bab96f8..ee3bbb97d69 100755 --- a/libatomic/configure +++ b/libatomic/configure @@ -657,6 +657,8 @@ LIBAT_BUILD_VERSIONED_SHLIB_TRUE OPT_LDFLAGS SECTION_LDFLAGS SYSROOT_CFLAGS_FOR_TARGET +ARCH_AARCH64_HAVE_LSE128_FALSE +ARCH_AARCH64_HAVE_LSE128_TRUE enable_aarch64_lse libtool_VERSION ENABLE_DARWIN_AT_RPATH_FALSE @@ -11456,7 +11458,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11459 "configure" +#line 11461 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11562,7 +11564,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11565 "configure" +#line 11567 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11926,6 +11928,55 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv9.4-a LSE128 insn support" >&5 +$as_echo_n "checking for armv9.4-a LSE128 insn support... " >&6; } +if ${libat_cv_have_feat_lse128+:} false; then : + $as_echo_n "(cached) " >&6 +else + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm(".arch armv9-a+lse128") + ; + return 0; +} +_ACEOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + eval libat_cv_have_feat_lse128=yes + else + eval libat_cv_have_feat_lse128=no + fi + rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lse128" >&5 +$as_echo "$libat_cv_have_feat_lse128" >&6; } + + yesno=`echo $libat_cv_have_feat_lse128 | tr 'yesno' '1 0 '` + +cat >>confdefs.h <<_ACEOF +#define HAVE_FEAT_LSE128 $yesno +_ACEOF + + + if test x$libat_cv_have_feat_lse128 = xyes; then + ARCH_AARCH64_HAVE_LSE128_TRUE= + ARCH_AARCH64_HAVE_LSE128_FALSE='#' +else + ARCH_AARCH64_HAVE_LSE128_TRUE='#' + ARCH_AARCH64_HAVE_LSE128_FALSE= +fi + + ;; esac @@ -15989,6 +16040,10 @@ if test -z "${ENABLE_DARWIN_AT_RPATH_TRUE}" && test -z "${ENABLE_DARWIN_AT_RPATH as_fn_error $? "conditional \"ENABLE_DARWIN_AT_RPATH\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${ARCH_AARCH64_HAVE_LSE128_TRUE}" && test -z "${ARCH_AARCH64_HAVE_LSE128_FALSE}"; then + as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LSE128\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${LIBAT_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBAT_BUILD_VERSIONED_SHLIB_FALSE}"; then as_fn_error $? "conditional \"LIBAT_BUILD_VERSIONED_SHLIB\" was never defined. diff --git a/libatomic/configure.ac b/libatomic/configure.ac index 5f2821ac3f4..b2fe68d7d0f 100644 --- a/libatomic/configure.ac +++ b/libatomic/configure.ac @@ -169,6 +169,7 @@ AC_MSG_RESULT([$target_thread_file]) case "$target" in *aarch64*) ACX_PROG_CC_WARNING_OPTS([-march=armv8-a+lse],[enable_aarch64_lse]) + LIBAT_TEST_FEAT_LSE128() ;; esac -- 2.42.0