Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-05-16 Thread Richard Sandiford via Gcc-patches
"Pop, Sebastian"  writes:
> Please see attached the patch back-ported to branches 12, 11, 10, and 9.
> Tested on aarch64-linux with bootstrap and regression test.
> Ok to commit to the GCC active branches?

OK, thanks.  Only very safe patches are supposed to be going into GCC 9
at this stage, and I guess this one is a bit on the edge.  But I think
it's worth applying anyway because it's fixing a non-determinstic
wrong-code regression.

Richard


Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-05-13 Thread Pop, Sebastian via Gcc-patches
Please see attached the patch back-ported to branches 12, 11, 10, and 9.
Tested on aarch64-linux with bootstrap and regression test.
Ok to commit to the GCC active branches?

Thanks,
SebastianFrom bba8d09284f3478f7d542ca4e7812d4c55e25bd4 Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Mon, 18 Apr 2022 15:13:20 +
Subject: [PATCH] [AArch64] add barriers to ool __sync builtins

2022-05-13  Sebastian Pop  

gcc/
	PR target/105162
	* config/aarch64/aarch64-protos.h (atomic_ool_names): Increase dimension
	of str array.
	* config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
	memmodel_from_int and handle MEMMODEL_SYNC_*.
	(DEF0): Add __aarch64_*_sync functions.

gcc/testsuite/
	PR target/105162
	* gcc.target/aarch64/sync-comp-swap-ool.c: New.
	* gcc.target/aarch64/sync-op-acquire-ool.c: New.
	* gcc.target/aarch64/sync-op-full-ool.c: New.
	* gcc.target/aarch64/target_attr_20.c: Update check.
	* gcc.target/aarch64/target_attr_21.c: Same.

libgcc/
	PR target/105162
	* config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5.
	* config/aarch64/t-lse: Add a 5th memory model for _sync functions.
---
 gcc/config/aarch64/aarch64-protos.h   |  2 +-
 gcc/config/aarch64/aarch64.cc | 12 --
 .../gcc.target/aarch64/sync-comp-swap-ool.c   |  6 +++
 .../gcc.target/aarch64/sync-op-acquire-ool.c  |  6 +++
 .../gcc.target/aarch64/sync-op-full-ool.c |  9 
 .../gcc.target/aarch64/target_attr_20.c   |  2 +-
 .../gcc.target/aarch64/target_attr_21.c   |  2 +-
 libgcc/config/aarch64/lse.S   | 42 +--
 libgcc/config/aarch64/t-lse   |  8 ++--
 9 files changed, 75 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 2ac781dff4a..df311812e8d 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1065,7 +1065,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
 
 struct atomic_ool_names
 {
-const char *str[5][4];
+const char *str[5][5];
 };
 
 rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f650abbc4ce..f4d2a800f39 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22678,14 +22678,14 @@ aarch64_emit_unlikely_jump (rtx insn)
   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
 }
 
-/* We store the names of the various atomic helpers in a 5x4 array.
+/* We store the names of the various atomic helpers in a 5x5 array.
Return the libcall function given MODE, MODEL and NAMES.  */
 
 rtx
 aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 			const atomic_ool_names *names)
 {
-  memmodel model = memmodel_base (INTVAL (model_rtx));
+  memmodel model = memmodel_from_int (INTVAL (model_rtx));
   int mode_idx, model_idx;
 
   switch (mode)
@@ -22725,6 +22725,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 case MEMMODEL_SEQ_CST:
   model_idx = 3;
   break;
+case MEMMODEL_SYNC_ACQUIRE:
+case MEMMODEL_SYNC_RELEASE:
+case MEMMODEL_SYNC_SEQ_CST:
+  model_idx = 4;
+  break;
 default:
   gcc_unreachable ();
 }
@@ -22737,7 +22742,8 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
   { "__aarch64_" #B #N "_relax", \
 "__aarch64_" #B #N "_acq", \
 "__aarch64_" #B #N "_rel", \
-"__aarch64_" #B #N "_acq_rel" }
+"__aarch64_" #B #N "_acq_rel", \
+"__aarch64_" #B #N "_sync" }
 
 #define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
 		 { NULL, NULL, NULL, NULL }
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
new file mode 100644
index 000..372f4aa8746
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -moutline-atomics" } */
+
+#include "sync-comp-swap.x"
+
+/* { dg-final { scan-assembler-times "bl.*__aarch64_cas4_sync" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
new file mode 100644
index 000..95d9c56b5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+nolse -O2 -moutline-atomics" } */
+
+#include "sync-op-acquire.x"
+
+/* { dg-final { scan-assembler-times "bl.*__aarch64_swp4_sync" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c
new file mode 100644
index 000..2f3881d9755
--- 

Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-05-13 Thread Wilco Dijkstra via Gcc-patches
Hi Sebastian,

>> Note the patch still needs an appropriate commit message.
>
> Added the following ChangeLog entry to the commit message.
>
> * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase 
>dimension
> of str array.
> * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
> memmodel_from_int and handle MEMMODEL_SYNC_*.
> (DEF0): Add __aarch64_*_sync functions.

When you commit this, please also add a PR target/105162 before the changelog 
so the
commit gets automatically added to the PR. We will need backports as well.

Cheers,
Wilco

Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-05-13 Thread Richard Sandiford via Gcc-patches
"Pop, Sebastian via Gcc-patches"  writes:
>> Yes this looks good to me (still needs maintainer approval). 
>
> Thanks again Wilco for your review.
>
>> One minor nitpick,
>> a few of the tests check for __aarch64_cas2 - this should be 
>> __aarch64_cas2_sync.
>
> Fixed in the attached patch.
>
>> Note the patch still needs an appropriate commit message.
>
> Added the following ChangeLog entry to the commit message.
>
> * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase 
> dimension
> of str array.
> * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
> memmodel_from_int and handle MEMMODEL_SYNC_*.
> (DEF0): Add __aarch64_*_sync functions.
>
> testsuite/
> * gcc.target/aarch64/sync-comp-swap-ool.c: New.
> * gcc.target/aarch64/sync-op-acquire-ool.c: New.
> * gcc.target/aarch64/sync-op-full-ool.c: New.
> * gcc.target/aarch64/target_attr_20.c: Update check.
> * gcc.target/aarch64/target_attr_21.c: Same.
>
> libgcc/
> * config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5.
> * config/aarch64/t-lse: Add a 5th memory model for _sync functions.

OK, thanks.

Richard

> From 3b624598035e4e0c1aee89efaae28596a64b3d0d Mon Sep 17 00:00:00 2001
> From: Sebastian Pop 
> Date: Mon, 18 Apr 2022 15:13:20 +
> Subject: [PATCH] [AArch64] add barriers to ool __sync builtins
>
>   * config/aarch64/aarch64-protos.h (atomic_ool_names): Increase dimension
>   of str array.
>   * config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
>   memmodel_from_int and handle MEMMODEL_SYNC_*.
>   (DEF0): Add __aarch64_*_sync functions.
>
> testsuite/
>   * gcc.target/aarch64/sync-comp-swap-ool.c: New.
>   * gcc.target/aarch64/sync-op-acquire-ool.c: New.
>   * gcc.target/aarch64/sync-op-full-ool.c: New.
>   * gcc.target/aarch64/target_attr_20.c: Update check.
>   * gcc.target/aarch64/target_attr_21.c: Same.
>
> libgcc/
>   * config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5.
>   * config/aarch64/t-lse: Add a 5th memory model for _sync functions.
> ---
>  gcc/config/aarch64/aarch64-protos.h   |  2 +-
>  gcc/config/aarch64/aarch64.cc | 12 --
>  .../gcc.target/aarch64/sync-comp-swap-ool.c   |  6 +++
>  .../gcc.target/aarch64/sync-op-acquire-ool.c  |  6 +++
>  .../gcc.target/aarch64/sync-op-full-ool.c |  9 
>  .../gcc.target/aarch64/target_attr_20.c   |  2 +-
>  .../gcc.target/aarch64/target_attr_21.c   |  2 +-
>  libgcc/config/aarch64/lse.S   | 42 +--
>  libgcc/config/aarch64/t-lse   |  8 ++--
>  9 files changed, 75 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 46bade28ed6..3ad5e77a1af 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1051,7 +1051,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
>  
>  struct atomic_ool_names
>  {
> -const char *str[5][4];
> +const char *str[5][5];
>  };
>  
>  rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 18f80499079..3ad11e84aae 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -22670,14 +22670,14 @@ aarch64_emit_unlikely_jump (rtx insn)
>add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
>  }
>  
> -/* We store the names of the various atomic helpers in a 5x4 array.
> +/* We store the names of the various atomic helpers in a 5x5 array.
> Return the libcall function given MODE, MODEL and NAMES.  */
>  
>  rtx
>  aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
>   const atomic_ool_names *names)
>  {
> -  memmodel model = memmodel_base (INTVAL (model_rtx));
> +  memmodel model = memmodel_from_int (INTVAL (model_rtx));
>int mode_idx, model_idx;
>  
>switch (mode)
> @@ -22717,6 +22717,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx 
> model_rtx,
>  case MEMMODEL_SEQ_CST:
>model_idx = 3;
>break;
> +case MEMMODEL_SYNC_ACQUIRE:
> +case MEMMODEL_SYNC_RELEASE:
> +case MEMMODEL_SYNC_SEQ_CST:
> +  model_idx = 4;
> +  break;
>  default:
>gcc_unreachable ();
>  }
> @@ -22729,7 +22734,8 @@ aarch64_atomic_ool_func(machine_mode mode, rtx 
> model_rtx,
>{ "__aarch64_" #B #N "_relax", \
>  "__aarch64_" #B #N "_acq", \
>  "__aarch64_" #B #N "_rel", \
> -"__aarch64_" #B #N "_acq_rel" }
> +"__aarch64_" #B #N "_acq_rel", \
> +"__aarch64_" #B #N "_sync" }
>  
>  #define DEF4(B)  DEF0(B, 1), DEF0(B, 

Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-05-04 Thread Pop, Sebastian via Gcc-patches
> Yes this looks good to me (still needs maintainer approval). 

Thanks again Wilco for your review.

> One minor nitpick,
> a few of the tests check for __aarch64_cas2 - this should be 
> __aarch64_cas2_sync.

Fixed in the attached patch.

> Note the patch still needs an appropriate commit message.

Added the following ChangeLog entry to the commit message.

* config/aarch64/aarch64-protos.h (atomic_ool_names): Increase dimension
of str array.
* config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
memmodel_from_int and handle MEMMODEL_SYNC_*.
(DEF0): Add __aarch64_*_sync functions.

testsuite/
* gcc.target/aarch64/sync-comp-swap-ool.c: New.
* gcc.target/aarch64/sync-op-acquire-ool.c: New.
* gcc.target/aarch64/sync-op-full-ool.c: New.
* gcc.target/aarch64/target_attr_20.c: Update check.
* gcc.target/aarch64/target_attr_21.c: Same.

libgcc/
* config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5.
* config/aarch64/t-lse: Add a 5th memory model for _sync functions.
From 3b624598035e4e0c1aee89efaae28596a64b3d0d Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Mon, 18 Apr 2022 15:13:20 +
Subject: [PATCH] [AArch64] add barriers to ool __sync builtins

	* config/aarch64/aarch64-protos.h (atomic_ool_names): Increase dimension
	of str array.
	* config/aarch64/aarch64.cc (aarch64_atomic_ool_func): Call
	memmodel_from_int and handle MEMMODEL_SYNC_*.
	(DEF0): Add __aarch64_*_sync functions.

testsuite/
	* gcc.target/aarch64/sync-comp-swap-ool.c: New.
	* gcc.target/aarch64/sync-op-acquire-ool.c: New.
	* gcc.target/aarch64/sync-op-full-ool.c: New.
	* gcc.target/aarch64/target_attr_20.c: Update check.
	* gcc.target/aarch64/target_attr_21.c: Same.

libgcc/
	* config/aarch64/lse.S: Define BARRIER and handle memory MODEL 5.
	* config/aarch64/t-lse: Add a 5th memory model for _sync functions.
---
 gcc/config/aarch64/aarch64-protos.h   |  2 +-
 gcc/config/aarch64/aarch64.cc | 12 --
 .../gcc.target/aarch64/sync-comp-swap-ool.c   |  6 +++
 .../gcc.target/aarch64/sync-op-acquire-ool.c  |  6 +++
 .../gcc.target/aarch64/sync-op-full-ool.c |  9 
 .../gcc.target/aarch64/target_attr_20.c   |  2 +-
 .../gcc.target/aarch64/target_attr_21.c   |  2 +-
 libgcc/config/aarch64/lse.S   | 42 +--
 libgcc/config/aarch64/t-lse   |  8 ++--
 9 files changed, 75 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 46bade28ed6..3ad5e77a1af 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1051,7 +1051,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
 
 struct atomic_ool_names
 {
-const char *str[5][4];
+const char *str[5][5];
 };
 
 rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 18f80499079..3ad11e84aae 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22670,14 +22670,14 @@ aarch64_emit_unlikely_jump (rtx insn)
   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
 }
 
-/* We store the names of the various atomic helpers in a 5x4 array.
+/* We store the names of the various atomic helpers in a 5x5 array.
Return the libcall function given MODE, MODEL and NAMES.  */
 
 rtx
 aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 			const atomic_ool_names *names)
 {
-  memmodel model = memmodel_base (INTVAL (model_rtx));
+  memmodel model = memmodel_from_int (INTVAL (model_rtx));
   int mode_idx, model_idx;
 
   switch (mode)
@@ -22717,6 +22717,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 case MEMMODEL_SEQ_CST:
   model_idx = 3;
   break;
+case MEMMODEL_SYNC_ACQUIRE:
+case MEMMODEL_SYNC_RELEASE:
+case MEMMODEL_SYNC_SEQ_CST:
+  model_idx = 4;
+  break;
 default:
   gcc_unreachable ();
 }
@@ -22729,7 +22734,8 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
   { "__aarch64_" #B #N "_relax", \
 "__aarch64_" #B #N "_acq", \
 "__aarch64_" #B #N "_rel", \
-"__aarch64_" #B #N "_acq_rel" }
+"__aarch64_" #B #N "_acq_rel", \
+"__aarch64_" #B #N "_sync" }
 
 #define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
 		 { NULL, NULL, NULL, NULL }
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
new file mode 100644
index 000..372f4aa8746
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options 

Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-05-03 Thread Wilco Dijkstra via Gcc-patches
Hi Sebastian,

> Please find attached the patch amended following your recommendations.
> The number of new functions for _sync is reduced by 3x.
> I tested the patch on Graviton2 aarch64-linux.
> I also checked by hand that the outline functions in libgcc look similar to 
> what GCC produces for the inline version.

Yes this looks good to me (still needs maintainer approval). One minor nitpick,
a few of the tests check for __aarch64_cas2 - this should be 
__aarch64_cas2_sync.
Note the patch still needs an appropriate commit message.

Cheers,
Wilco

Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-04-25 Thread Pop, Sebastian via Gcc-patches
Hi Wilco,

Thanks for your review.
Please find attached the patch amended following your recommendations.
The number of new functions for _sync is reduced by 3x.
I tested the patch on Graviton2 aarch64-linux.
I also checked by hand that the outline functions in libgcc look similar to 
what GCC produces for the inline version.

Thanks,
Sebastian

From: Wilco Dijkstra 
Sent: Tuesday, April 19, 2022 7:51 AM
To: Pop, Sebastian; gcc-patches@gcc.gnu.org
Cc: Kyrylo Tkachov
Subject: RE: [EXTERNAL] [AArch64] PR105162: emit barrier for __sync and 
__atomic builtins on CPUs without LSE

CAUTION: This email originated from outside of the organization. Do not click 
links or open attachments unless you can confirm the sender and know the 
content is safe.



Hi Sebastian,

> Wilco pointed out in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105162#c7​ 
> that
> "Only __sync needs the extra full barrier, but __atomic does not."
> The attached patch does that by adding out-of-line functions for 
> MEMMODEL_SYNC_*.
> Those new functions contain a barrier on the path without LSE instructions.

Yes, adding _sync versions of the outline functions is the correct approach. 
However
there is no need to have separate _acq/_rel/_seq variants for every function 
since all
but one are _seq. Also we should ensure we generate the same sequence as the 
inlined
versions so that they are consistent. This means ensuring the LDXR macro 
ignores the
'A' for the _sync variants and the swp function switches to acquire semantics.

Cheers,
Wilco
From 4e90111e507a611d3daa0f71fc17c6d5cc3e203e Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Mon, 18 Apr 2022 15:13:20 +
Subject: [PATCH] [AArch64] add barriers to ool __sync builtins

---
 gcc/config/aarch64/aarch64-protos.h   |  2 +-
 gcc/config/aarch64/aarch64.cc | 12 --
 .../gcc.target/aarch64/sync-comp-swap-ool.c   |  6 +++
 .../gcc.target/aarch64/sync-op-acquire-ool.c  |  6 +++
 .../gcc.target/aarch64/sync-op-full-ool.c |  9 
 .../gcc.target/aarch64/target_attr_20.c   |  2 +-
 .../gcc.target/aarch64/target_attr_21.c   |  2 +-
 libgcc/config/aarch64/lse.S   | 42 +--
 libgcc/config/aarch64/t-lse   |  8 ++--
 9 files changed, 75 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 46bade28ed6..3ad5e77a1af 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1051,7 +1051,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
 
 struct atomic_ool_names
 {
-const char *str[5][4];
+const char *str[5][5];
 };
 
 rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 18f80499079..3ad11e84aae 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22670,14 +22670,14 @@ aarch64_emit_unlikely_jump (rtx insn)
   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
 }
 
-/* We store the names of the various atomic helpers in a 5x4 array.
+/* We store the names of the various atomic helpers in a 5x5 array.
Return the libcall function given MODE, MODEL and NAMES.  */
 
 rtx
 aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 			const atomic_ool_names *names)
 {
-  memmodel model = memmodel_base (INTVAL (model_rtx));
+  memmodel model = memmodel_from_int (INTVAL (model_rtx));
   int mode_idx, model_idx;
 
   switch (mode)
@@ -22717,6 +22717,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 case MEMMODEL_SEQ_CST:
   model_idx = 3;
   break;
+case MEMMODEL_SYNC_ACQUIRE:
+case MEMMODEL_SYNC_RELEASE:
+case MEMMODEL_SYNC_SEQ_CST:
+  model_idx = 4;
+  break;
 default:
   gcc_unreachable ();
 }
@@ -22729,7 +22734,8 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
   { "__aarch64_" #B #N "_relax", \
 "__aarch64_" #B #N "_acq", \
 "__aarch64_" #B #N "_rel", \
-"__aarch64_" #B #N "_acq_rel" }
+"__aarch64_" #B #N "_acq_rel", \
+"__aarch64_" #B #N "_sync" }
 
 #define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
 		 { NULL, NULL, NULL, NULL }
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
new file mode 100644
index 000..372f4aa8746
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -moutline-atomics" } */
+
+#include "sync-comp-swap.x"
+
+/* { dg-final { scan-assembler-times "bl.*__aarch64_cas4_sync" 1 } } 

Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-04-19 Thread Wilco Dijkstra via Gcc-patches
Hi Sebastian,

> Wilco pointed out in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105162#c7​ 
> that
> "Only __sync needs the extra full barrier, but __atomic does not."
> The attached patch does that by adding out-of-line functions for 
> MEMMODEL_SYNC_*.
> Those new functions contain a barrier on the path without LSE instructions.

Yes, adding _sync versions of the outline functions is the correct approach. 
However
there is no need to have separate _acq/_rel/_seq variants for every function 
since all
but one are _seq. Also we should ensure we generate the same sequence as the 
inlined
versions so that they are consistent. This means ensuring the LDXR macro 
ignores the
'A' for the _sync variants and the swp function switches to acquire semantics.

Cheers,
Wilco

Re: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-04-18 Thread Pop, Sebastian via Gcc-patches
Hi,


Wilco pointed out in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105162#c7? 
that

"Only __sync needs the extra full barrier, but __atomic does not."

The attached patch does that by adding out-of-line functions for 
MEMMODEL_SYNC_*.

Those new functions contain a barrier on the path without LSE instructions.


I tested the patch on aarch64-linux with bootstrap and make check.


Sebastian



From: Pop, Sebastian
Sent: Thursday, April 7, 2022 3:15 PM
To: gcc-patches@gcc.gnu.org
Cc: Kyrylo Tkachov
Subject: [AArch64] PR105162: emit barrier for __sync and __atomic builtins on 
CPUs without LSE


Hi,


With -moutline-atomics gcc stops generating a barrier for __sync builtins: 
https://gcc.gnu.org/PR105162

This is a problem on CPUs without LSE instructions where the ld/st exclusives 
do not guarantee a full barrier.

The attached patch adds the barrier to the outline-atomics functions on the 
path without LSE instructions.

In consequence, under -moutline-atomics __atomic and __sync builtins now behave 
the same with and without LSE instructions.

To complete the change, the second patch makes gcc emit the barrier for 
__atomic builtins as well, i.e., independently of is_mm_sync().


Sebastian
From b45842c209ddcb560d6de477ced444f4d948ea76 Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Mon, 18 Apr 2022 15:13:20 +
Subject: [PATCH] [AArch64] add barriers to ool __sync builtins

---
 gcc/config/aarch64/aarch64-protos.h   |  2 +-
 gcc/config/aarch64/aarch64.cc | 18 --
 .../gcc.target/aarch64/sync-comp-swap-ool.c   |  6 
 .../gcc.target/aarch64/sync-op-acquire-ool.c  |  6 
 .../gcc.target/aarch64/sync-op-full-ool.c |  9 +
 .../gcc.target/aarch64/target_attr_20.c   |  2 +-
 .../gcc.target/aarch64/target_attr_21.c   |  2 +-
 libgcc/config/aarch64/lse.S   | 33 +--
 libgcc/config/aarch64/t-lse   |  8 ++---
 9 files changed, 74 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full-ool.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 46bade28ed6..0b325e1e32b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1051,7 +1051,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
 
 struct atomic_ool_names
 {
-const char *str[5][4];
+const char *str[5][7];
 };
 
 rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 18f80499079..3b02ee379f5 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22670,14 +22670,14 @@ aarch64_emit_unlikely_jump (rtx insn)
   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
 }
 
-/* We store the names of the various atomic helpers in a 5x4 array.
+/* We store the names of the various atomic helpers in a 5x7 array.
Return the libcall function given MODE, MODEL and NAMES.  */
 
 rtx
 aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 			const atomic_ool_names *names)
 {
-  memmodel model = memmodel_base (INTVAL (model_rtx));
+  memmodel model = memmodel_from_int (INTVAL (model_rtx));
   int mode_idx, model_idx;
 
   switch (mode)
@@ -22717,6 +22717,15 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
 case MEMMODEL_SEQ_CST:
   model_idx = 3;
   break;
+case MEMMODEL_SYNC_ACQUIRE:
+  model_idx = 4;
+  break;
+case MEMMODEL_SYNC_RELEASE:
+  model_idx = 5;
+  break;
+case MEMMODEL_SYNC_SEQ_CST:
+  model_idx = 6;
+  break;
 default:
   gcc_unreachable ();
 }
@@ -22729,7 +22738,10 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
   { "__aarch64_" #B #N "_relax", \
 "__aarch64_" #B #N "_acq", \
 "__aarch64_" #B #N "_rel", \
-"__aarch64_" #B #N "_acq_rel" }
+"__aarch64_" #B #N "_acq_rel", \
+"__aarch64_" #B #N "_sync_acq", \
+"__aarch64_" #B #N "_sync_rel", \
+"__aarch64_" #B #N "_sync_seq" }
 
 #define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
 		 { NULL, NULL, NULL, NULL }
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
new file mode 100644
index 000..cb28db21d17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap-ool.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -moutline-atomics" } */
+
+#include "sync-comp-swap.x"
+
+/* { dg-final { scan-assembler-times "bl.*__aarch64_cas4_sync_seq" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire-ool.c 

[AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-04-07 Thread Pop, Sebastian via Gcc-patches
Hi,


With -moutline-atomics gcc stops generating a barrier for __sync builtins: 
https://gcc.gnu.org/PR105162

This is a problem on CPUs without LSE instructions where the ld/st exclusives 
do not guarantee a full barrier.

The attached patch adds the barrier to the outline-atomics functions on the 
path without LSE instructions.

In consequence, under -moutline-atomics __atomic and __sync builtins now behave 
the same with and without LSE instructions.

To complete the change, the second patch makes gcc emit the barrier for 
__atomic builtins as well, i.e., independently of is_mm_sync().


Sebastian
From b1ffa7d737427dc9414cb0c315f08b7c84ef647b Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Wed, 6 Apr 2022 21:42:11 +
Subject: [PATCH] [AArch64] add barrier to no LSE path in outline-atomics
 functions

---
 libgcc/config/aarch64/lse.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
index c353ec2215b..ac77c68e300 100644
--- a/libgcc/config/aarch64/lse.S
+++ b/libgcc/config/aarch64/lse.S
@@ -229,6 +229,7 @@ STARTFN	NAME(swp)
 0:	LDXR		s(0), [x1]
 	STXR		w(tmp1), s(tmp0), [x1]
 	cbnz		w(tmp1), 0b
+dmb ish
 	ret
 
 ENDFN	NAME(swp)
@@ -273,6 +274,7 @@ STARTFN	NAME(LDNM)
 	OP		s(tmp1), s(0), s(tmp0)
 	STXR		w(tmp2), s(tmp1), [x1]
 	cbnz		w(tmp2), 0b
+dmb ish
 	ret
 
 ENDFN	NAME(LDNM)
-- 
2.25.1

From 68c07f95157057f0167723b182f0ccffdac8a17e Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Thu, 7 Apr 2022 19:18:57 +
Subject: [PATCH 2/2] [AArch64] emit a barrier for __atomic builtins

---
 gcc/config/aarch64/aarch64.cc | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 18f80499079..be1b8d22c6a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22931,9 +22931,7 @@ aarch64_split_compare_and_swap (rtx operands[])
   if (strong_zero_p)
 aarch64_gen_compare_reg (NE, rval, const0_rtx);
 
-  /* Emit any final barrier needed for a __sync operation.  */
-  if (is_mm_sync (model))
-aarch64_emit_post_barrier (model);
+  aarch64_emit_post_barrier (model);
 }
 
 /* Split an atomic operation.  */
@@ -22948,7 +22946,6 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   machine_mode mode = GET_MODE (mem);
   machine_mode wmode = (mode == DImode ? DImode : SImode);
   const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
-  const bool is_sync = is_mm_sync (model);
   rtx_code_label *label;
   rtx x;
 
@@ -22966,11 +22963,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
 
   /* The initial load can be relaxed for a __sync operation since a final
  barrier will be emitted to stop code hoisting.  */
- if (is_sync)
-aarch64_emit_load_exclusive (mode, old_out, mem,
- GEN_INT (MEMMODEL_RELAXED));
-  else
-aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
+  aarch64_emit_load_exclusive (mode, old_out, mem, GEN_INT (MEMMODEL_RELAXED));
 
   switch (code)
 {
@@ -23016,9 +23009,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
 			gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
   aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
 
-  /* Emit any final barrier needed for a __sync operation.  */
-  if (is_sync)
-aarch64_emit_post_barrier (model);
+  aarch64_emit_post_barrier (model);
 }
 
 static void
-- 
2.25.1