"Pop, Sebastian via Gcc-patches" <gcc-patches@gcc.gnu.org> writes: >> Yes this looks good to me (still needs maintainer approval). > > Thanks again Wilco for your review. > >> One minor nitpick, >> a few of the tests check for __aarch64_cas2 - this should be >> __aarch64_cas2_sync. > > Fixed in the attached patch. > >> Note the patch still needs an appropriate commit message. > > Added the following ChangeLog entry to the commit message. > > * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase > dimension > of str array. > * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call > memmodel_from_int and handle MEMMODEL_SYNC_*. > (DEF0): Add __aarch64_*_sync functions. > > testsuite/ > * gcc.target/aarch64/sync-comp-swap-ool.c: New. > * gcc.target/aarch64/sync-op-acquire-ool.c: New. > * gcc.target/aarch64/sync-op-full-ool.c: New. > * gcc.target/aarch64/target_attr_20.c: Update check. > * gcc.target/aarch64/target_attr_21.c: Same. > > libgcc/ > * config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5. > * config/aarch64/t-lse: Add a 5th memory model for _sync functions.
OK, thanks. Richard > From 3b624598035e4e0c1aee89efaae28596a64b3d0d Mon Sep 17 00:00:00 2001 > From: Sebastian Pop <s...@amazon.com> > Date: Mon, 18 Apr 2022 15:13:20 +0000 > Subject: [PATCH] [AArch64] add barriers to ool __sync builtins > > * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase dimension > of str array. > * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call > memmodel_from_int and handle MEMMODEL_SYNC_*. > (DEF0): Add __aarch64_*_sync functions. > > testsuite/ > * gcc.target/aarch64/sync-comp-swap-ool.c: New. > * gcc.target/aarch64/sync-op-acquire-ool.c: New. > * gcc.target/aarch64/sync-op-full-ool.c: New. > * gcc.target/aarch64/target_attr_20.c: Update check. > * gcc.target/aarch64/target_attr_21.c: Same. > > libgcc/ > * config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5. > * config/aarch64/t-lse: Add a 5th memory model for _sync functions. > --- > gcc/config/aarch64/aarch64-protos.h | 2 +- > gcc/config/aarch64/aarch64.cc | 12 ++++-- > .../gcc.target/aarch64/sync-comp-swap-ool.c | 6 +++ > .../gcc.target/aarch64/sync-op-acquire-ool.c | 6 +++ > .../gcc.target/aarch64/sync-op-full-ool.c | 9 ++++ > .../gcc.target/aarch64/target_attr_20.c | 2 +- > .../gcc.target/aarch64/target_attr_21.c | 2 +- > libgcc/config/aarch64/lse.S | 42 +++++++++++++++++-- > libgcc/config/aarch64/t-lse | 8 ++-- > 9 files changed, 75 insertions(+), 14 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index 46bade28ed6..3ad5e77a1af 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -1051,7 +1051,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT); > > struct atomic_ool_names > { > - const char *str[5][4]; > + const char *str[5][5]; > }; > > rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 18f80499079..3ad11e84aae 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -22670,14 +22670,14 @@ aarch64_emit_unlikely_jump (rtx insn) > add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); > } > > -/* We store the names of the various atomic helpers in a 5x4 array. > +/* We store the names of the various atomic helpers in a 5x5 array. > Return the libcall function given MODE, MODEL and NAMES. */ > > rtx > aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, > const atomic_ool_names *names) > { > - memmodel model = memmodel_base (INTVAL (model_rtx)); > + memmodel model = memmodel_from_int (INTVAL (model_rtx)); > int mode_idx, model_idx; > > switch (mode) > @@ -22717,6 +22717,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx > model_rtx, > case MEMMODEL_SEQ_CST: > model_idx = 3; > break; > + case MEMMODEL_SYNC_ACQUIRE: > + case MEMMODEL_SYNC_RELEASE: > + case MEMMODEL_SYNC_SEQ_CST: > + model_idx = 4; > + break; > default: > gcc_unreachable (); > } > @@ -22729,7 +22734,8 @@ aarch64_atomic_ool_func(machine_mode mode, rtx > model_rtx, > { "__aarch64_" #B #N "_relax", \ > "__aarch64_" #B #N "_acq", \ > "__aarch64_" #B #N "_rel", \ > - "__aarch64_" #B #N "_acq_rel" } > + "__aarch64_" #B #N "_acq_rel", \ > + "__aarch64_" #B #N "_sync" } > > #define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \ > { NULL, NULL, NULL, NULL } > diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c > b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c > new file mode 100644 > index 00000000000..372f4aa8746 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c > @@ -0,0 +1,6 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -moutline-atomics" } > */ > + > +#include "sync-comp-swap.x" > + > +/* { dg-final { scan-assembler-times "bl.*__aarch64_cas4_sync" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c > b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c > new file mode 100644 > index 00000000000..95d9c56b5e1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c > @@ -0,0 +1,6 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=armv8-a+nolse -O2 -moutline-atomics" } */ > + > +#include "sync-op-acquire.x" > + > +/* { dg-final { scan-assembler-times "bl.*__aarch64_swp4_sync" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c > b/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c > new file mode 100644 > index 00000000000..2f3881d9755 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=armv8-a+nolse -O2 -moutline-atomics" } */ > + > +#include "sync-op-full.x" > + > +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldadd4_sync" 1 } } */ > +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldclr4_sync" 1 } } */ > +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldeor4_sync" 1 } } */ > +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldset4_sync" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_20.c > b/gcc/testsuite/gcc.target/aarch64/target_attr_20.c > index 509fb039e84..c9454fc420b 100644 > --- a/gcc/testsuite/gcc.target/aarch64/target_attr_20.c > +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_20.c > @@ -24,4 +24,4 @@ bar (void) > } > } > > -/* { dg-final { scan-assembler-not "bl.*__aarch64_cas2_acq_rel" } } */ > +/* { dg-final { scan-assembler-not "bl.*__aarch64_cas2_sync" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_21.c > b/gcc/testsuite/gcc.target/aarch64/target_attr_21.c > index acace4c8f2a..b8e56223b02 100644 > --- a/gcc/testsuite/gcc.target/aarch64/target_attr_21.c > +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_21.c > @@ -24,4 +24,4 @@ bar (void) > } > } > > -/* { dg-final { scan-assembler-times "bl.*__aarch64_cas2_acq_rel" 1 } } */ > +/* { dg-final { scan-assembler-times "bl.*__aarch64_cas2_sync" 1 } } */ > diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S > index c353ec2215b..9c29cf08b59 100644 > --- a/libgcc/config/aarch64/lse.S > +++ b/libgcc/config/aarch64/lse.S > @@ -87,24 +87,44 @@ see the files COPYING3 and COPYING.RUNTIME respectively. > If not, see > # define L > # define M 0x000000 > # define N 0x000000 > +# define BARRIER > #elif MODEL == 2 > # define SUFF _acq > # define A a > # define L > # define M 0x400000 > # define N 0x800000 > +# define BARRIER > #elif MODEL == 3 > # define SUFF _rel > # define A > # define L l > # define M 0x008000 > # define N 0x400000 > +# define BARRIER > #elif MODEL == 4 > # define SUFF _acq_rel > # define A a > # define L l > # define M 0x408000 > # define N 0xc00000 > +# define BARRIER > +#elif MODEL == 5 > +# define SUFF _sync > +#ifdef L_swp > +/* swp has _acq semantics. */ > +# define A a > +# define L > +# define M 0x400000 > +# define N 0x800000 > +#else > +/* All other _sync functions have _seq semantics. */ > +# define A a > +# define L l > +# define M 0x408000 > +# define N 0xc00000 > +#endif > +# define BARRIER dmb ish > #else > # error > #endif > @@ -127,7 +147,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. > If not, see > #endif > > #define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF) > -#define LDXR glue4(ld, A, xr, S) > +#if MODEL == 5 > +/* Drop A for _sync functions. */ > +# define LDXR glue3(ld, xr, S) > +#else > +# define LDXR glue4(ld, A, xr, S) > +#endif > #define STXR glue4(st, L, xr, S) > > /* Temporary registers used. Other than these, only the return value > @@ -183,10 +208,16 @@ STARTFN NAME(cas) > bne 1f > STXR w(tmp1), s(1), [x2] > cbnz w(tmp1), 0b > -1: ret > +1: BARRIER > + ret > > #else > -#define LDXP glue3(ld, A, xp) > +#if MODEL == 5 > +/* Drop A for _sync functions. */ > +# define LDXP glue2(ld, xp) > +#else > +# define LDXP glue3(ld, A, xp) > +#endif > #define STXP glue3(st, L, xp) > #ifdef HAVE_AS_LSE > # define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4] > @@ -205,7 +236,8 @@ STARTFN NAME(cas) > bne 1f > STXP w(tmp2), x2, x3, [x4] > cbnz w(tmp2), 0b > -1: ret > +1: BARRIER > + ret > > #endif > > @@ -229,6 +261,7 @@ STARTFN NAME(swp) > 0: LDXR s(0), [x1] > STXR w(tmp1), s(tmp0), [x1] > cbnz w(tmp1), 0b > + BARRIER > ret > > ENDFN NAME(swp) > @@ -273,6 +306,7 @@ STARTFN NAME(LDNM) > OP s(tmp1), s(0), s(tmp0) > STXR w(tmp2), s(tmp1), [x1] > cbnz w(tmp2), 0b > + BARRIER > ret > > ENDFN NAME(LDNM) > diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse > index 790cada3315..624daf7eddf 100644 > --- a/libgcc/config/aarch64/t-lse > +++ b/libgcc/config/aarch64/t-lse > @@ -18,13 +18,13 @@ > # along with GCC; see the file COPYING3. If not see > # <http://www.gnu.org/licenses/>. > > -# Compare-and-swap has 5 sizes and 4 memory models. > +# Compare-and-swap has 5 sizes and 5 memory models. > S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) > -O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0))) > +O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0))) > > -# Swap, Load-and-operate have 4 sizes and 4 memory models > +# Swap, Load-and-operate have 4 sizes and 5 memory models > S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset)) > -O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1))) > +O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1))) > > LSE_OBJS := $(O0) $(O1)