"Pop, Sebastian via Gcc-patches" <gcc-patches@gcc.gnu.org> writes:
>> Yes this looks good to me (still needs maintainer approval). 
>
> Thanks again Wilco for your review.
>
>> One minor nitpick,
>> a few of the tests check for __aarch64_cas2 - this should be 
>> __aarch64_cas2_sync.
>
> Fixed in the attached patch.
>
>> Note the patch still needs an appropriate commit message.
>
> Added the following ChangeLog entry to the commit message.
>
>         * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase 
> dimension
>         of str array.
>         * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
>         memmodel_from_int and handle MEMMODEL_SYNC_*.
>         (DEF0): Add __aarch64_*_sync functions.
>
> testsuite/
>         * gcc.target/aarch64/sync-comp-swap-ool.c: New.
>         * gcc.target/aarch64/sync-op-acquire-ool.c: New.
>         * gcc.target/aarch64/sync-op-full-ool.c: New.
>         * gcc.target/aarch64/target_attr_20.c: Update check.
>         * gcc.target/aarch64/target_attr_21.c: Same.
>
> libgcc/
>         * config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5.
>         * config/aarch64/t-lse: Add a 5th memory model for _sync functions.

OK, thanks.

Richard

> From 3b624598035e4e0c1aee89efaae28596a64b3d0d Mon Sep 17 00:00:00 2001
> From: Sebastian Pop <s...@amazon.com>
> Date: Mon, 18 Apr 2022 15:13:20 +0000
> Subject: [PATCH] [AArch64] add barriers to ool __sync builtins
>
>       * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase dimension
>       of str array.
>       * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
>       memmodel_from_int and handle MEMMODEL_SYNC_*.
>       (DEF0): Add __aarch64_*_sync functions.
>
> testsuite/
>       * gcc.target/aarch64/sync-comp-swap-ool.c: New.
>       * gcc.target/aarch64/sync-op-acquire-ool.c: New.
>       * gcc.target/aarch64/sync-op-full-ool.c: New.
>       * gcc.target/aarch64/target_attr_20.c: Update check.
>       * gcc.target/aarch64/target_attr_21.c: Same.
>
> libgcc/
>       * config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5.
>       * config/aarch64/t-lse: Add a 5th memory model for _sync functions.
> ---
>  gcc/config/aarch64/aarch64-protos.h           |  2 +-
>  gcc/config/aarch64/aarch64.cc                 | 12 ++++--
>  .../gcc.target/aarch64/sync-comp-swap-ool.c   |  6 +++
>  .../gcc.target/aarch64/sync-op-acquire-ool.c  |  6 +++
>  .../gcc.target/aarch64/sync-op-full-ool.c     |  9 ++++
>  .../gcc.target/aarch64/target_attr_20.c       |  2 +-
>  .../gcc.target/aarch64/target_attr_21.c       |  2 +-
>  libgcc/config/aarch64/lse.S                   | 42 +++++++++++++++++--
>  libgcc/config/aarch64/t-lse                   |  8 ++--
>  9 files changed, 75 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 46bade28ed6..3ad5e77a1af 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1051,7 +1051,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
>  
>  struct atomic_ool_names
>  {
> -    const char *str[5][4];
> +    const char *str[5][5];
>  };
>  
>  rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 18f80499079..3ad11e84aae 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -22670,14 +22670,14 @@ aarch64_emit_unlikely_jump (rtx insn)
>    add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
>  }
>  
> -/* We store the names of the various atomic helpers in a 5x4 array.
> +/* We store the names of the various atomic helpers in a 5x5 array.
>     Return the libcall function given MODE, MODEL and NAMES.  */
>  
>  rtx
>  aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
>                       const atomic_ool_names *names)
>  {
> -  memmodel model = memmodel_base (INTVAL (model_rtx));
> +  memmodel model = memmodel_from_int (INTVAL (model_rtx));
>    int mode_idx, model_idx;
>  
>    switch (mode)
> @@ -22717,6 +22717,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx 
> model_rtx,
>      case MEMMODEL_SEQ_CST:
>        model_idx = 3;
>        break;
> +    case MEMMODEL_SYNC_ACQUIRE:
> +    case MEMMODEL_SYNC_RELEASE:
> +    case MEMMODEL_SYNC_SEQ_CST:
> +      model_idx = 4;
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -22729,7 +22734,8 @@ aarch64_atomic_ool_func(machine_mode mode, rtx 
> model_rtx,
>    { "__aarch64_" #B #N "_relax", \
>      "__aarch64_" #B #N "_acq", \
>      "__aarch64_" #B #N "_rel", \
> -    "__aarch64_" #B #N "_acq_rel" }
> +    "__aarch64_" #B #N "_acq_rel", \
> +    "__aarch64_" #B #N "_sync" }
>  
>  #define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
>                { NULL, NULL, NULL, NULL }
> diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c 
> b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
> new file mode 100644
> index 00000000000..372f4aa8746
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -moutline-atomics" } 
> */
> +
> +#include "sync-comp-swap.x"
> +
> +/* { dg-final { scan-assembler-times "bl.*__aarch64_cas4_sync" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c 
> b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
> new file mode 100644
> index 00000000000..95d9c56b5e1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -moutline-atomics" } */
> +
> +#include "sync-op-acquire.x"
> +
> +/* { dg-final { scan-assembler-times "bl.*__aarch64_swp4_sync" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c 
> b/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c
> new file mode 100644
> index 00000000000..2f3881d9755
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+nolse -O2 -moutline-atomics" } */
> +
> +#include "sync-op-full.x"
> +
> +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldadd4_sync" 1 } } */
> +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldclr4_sync" 1 } } */
> +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldeor4_sync" 1 } } */
> +/* { dg-final { scan-assembler-times "bl.*__aarch64_ldset4_sync" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_20.c 
> b/gcc/testsuite/gcc.target/aarch64/target_attr_20.c
> index 509fb039e84..c9454fc420b 100644
> --- a/gcc/testsuite/gcc.target/aarch64/target_attr_20.c
> +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_20.c
> @@ -24,4 +24,4 @@ bar (void)
>      }
>  }
>  
> -/* { dg-final { scan-assembler-not "bl.*__aarch64_cas2_acq_rel" } } */
> +/* { dg-final { scan-assembler-not "bl.*__aarch64_cas2_sync" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_21.c 
> b/gcc/testsuite/gcc.target/aarch64/target_attr_21.c
> index acace4c8f2a..b8e56223b02 100644
> --- a/gcc/testsuite/gcc.target/aarch64/target_attr_21.c
> +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_21.c
> @@ -24,4 +24,4 @@ bar (void)
>      }
>  }
>  
> -/* { dg-final { scan-assembler-times "bl.*__aarch64_cas2_acq_rel" 1 } } */
> +/* { dg-final { scan-assembler-times "bl.*__aarch64_cas2_sync" 1 } } */
> diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
> index c353ec2215b..9c29cf08b59 100644
> --- a/libgcc/config/aarch64/lse.S
> +++ b/libgcc/config/aarch64/lse.S
> @@ -87,24 +87,44 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  # define L
>  # define M     0x000000
>  # define N     0x000000
> +# define BARRIER
>  #elif MODEL == 2
>  # define SUFF  _acq
>  # define A     a
>  # define L
>  # define M     0x400000
>  # define N     0x800000
> +# define BARRIER
>  #elif MODEL == 3
>  # define SUFF  _rel
>  # define A
>  # define L     l
>  # define M     0x008000
>  # define N     0x400000
> +# define BARRIER
>  #elif MODEL == 4
>  # define SUFF  _acq_rel
>  # define A     a
>  # define L     l
>  # define M     0x408000
>  # define N     0xc00000
> +# define BARRIER
> +#elif MODEL == 5
> +# define SUFF  _sync
> +#ifdef L_swp
> +/* swp has _acq semantics.  */
> +#  define A    a
> +#  define L
> +#  define M    0x400000
> +#  define N    0x800000
> +#else
> +/* All other _sync functions have _seq semantics.  */
> +#  define A    a
> +#  define L    l
> +#  define M    0x408000
> +#  define N    0xc00000
> +#endif
> +# define BARRIER dmb         ish
>  #else
>  # error
>  #endif
> @@ -127,7 +147,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. 
>  If not, see
>  #endif
>  
>  #define NAME(BASE)           glue4(__aarch64_, BASE, SIZE, SUFF)
> -#define LDXR                 glue4(ld, A, xr, S)
> +#if MODEL == 5
> +/* Drop A for _sync functions.  */
> +# define LDXR                        glue3(ld, xr, S)
> +#else
> +# define LDXR                        glue4(ld, A, xr, S)
> +#endif
>  #define STXR                 glue4(st, L, xr, S)
>  
>  /* Temporary registers used.  Other than these, only the return value
> @@ -183,10 +208,16 @@ STARTFN NAME(cas)
>       bne             1f
>       STXR            w(tmp1), s(1), [x2]
>       cbnz            w(tmp1), 0b
> -1:   ret
> +1:   BARRIER
> +     ret
>  
>  #else
> -#define LDXP glue3(ld, A, xp)
> +#if MODEL == 5
> +/* Drop A for _sync functions.  */
> +# define LDXP        glue2(ld, xp)
> +#else
> +# define LDXP        glue3(ld, A, xp)
> +#endif
>  #define STXP glue3(st, L, xp)
>  #ifdef HAVE_AS_LSE
>  # define CASP        glue3(casp, A, L)       x0, x1, x2, x3, [x4]
> @@ -205,7 +236,8 @@ STARTFN   NAME(cas)
>       bne             1f
>       STXP            w(tmp2), x2, x3, [x4]
>       cbnz            w(tmp2), 0b
> -1:   ret
> +1:   BARRIER
> +     ret
>  
>  #endif
>  
> @@ -229,6 +261,7 @@ STARTFN   NAME(swp)
>  0:   LDXR            s(0), [x1]
>       STXR            w(tmp1), s(tmp0), [x1]
>       cbnz            w(tmp1), 0b
> +     BARRIER
>       ret
>  
>  ENDFN        NAME(swp)
> @@ -273,6 +306,7 @@ STARTFN   NAME(LDNM)
>       OP              s(tmp1), s(0), s(tmp0)
>       STXR            w(tmp2), s(tmp1), [x1]
>       cbnz            w(tmp2), 0b
> +     BARRIER
>       ret
>  
>  ENDFN        NAME(LDNM)
> diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
> index 790cada3315..624daf7eddf 100644
> --- a/libgcc/config/aarch64/t-lse
> +++ b/libgcc/config/aarch64/t-lse
> @@ -18,13 +18,13 @@
>  # along with GCC; see the file COPYING3.  If not see
>  # <http://www.gnu.org/licenses/>.
>  
> -# Compare-and-swap has 5 sizes and 4 memory models.
> +# Compare-and-swap has 5 sizes and 5 memory models.
>  S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
> -O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
> +O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0)))
>  
> -# Swap, Load-and-operate have 4 sizes and 4 memory models
> +# Swap, Load-and-operate have 4 sizes and 5 memory models
>  S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
> -O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
> +O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1)))
>  
>  LSE_OBJS := $(O0) $(O1)

Reply via email to