Re: [AArch64][PATCH 4/5] Use atomic load-operate instructions for fetch-update patterns.

2015-09-21 Thread Matthew Wahab

On 18/09/15 09:55, James Greenhalgh wrote:

On Thu, Sep 17, 2015 at 05:47:43PM +0100, Matthew Wahab wrote:

Hello,

ARMv8.1 adds atomic swap and atomic load-operate instructions with
optional memory ordering specifiers. This patch uses the ARMv8.1 atomic
load-operate instructions to implement the atomic_fetch_
patterns. This patch also updates the implementation of the atomic_
patterns, which are treated as versions of the atomic_fetch_ which
discard the loaded data.


[..]


Ok for trunk?


Some comments in line below.



diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index eba4c76..76ebd6f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx);
  void aarch64_expand_compare_and_swap (rtx op[]);
  void aarch64_split_compare_and_swap (rtx op[]);
  void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
+
+bool aarch64_atomic_ldop_supported_p (enum rtx_code);
  void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
  void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index dc05c6e..33f9ef3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11064,6 +11064,33 @@ aarch64_expand_compare_and_swap (rtx operands[])
emit_insn (gen_rtx_SET (bval, x));
  }

+/* Test whether the target supports using a atomic load-operate instruction.
+   CODE is the operation and AFTER is TRUE if the data in memory after the
+   operation should be returned and FALSE if the data before the operation
+   should be returned.  Returns FALSE if the operation isn't supported by the
+   architecture.
+  */


Stray newline, leave the */ on the line before.


Fixed this.


+
+bool
+aarch64_atomic_ldop_supported_p (enum rtx_code code)
+{
+  if (!TARGET_LSE)
+return false;
+
+  switch (code)
+{
+case SET:
+case AND:
+case IOR:
+case XOR:
+case MINUS:
+case PLUS:
+  return true;
+default:
+  return false;
+}
+}
+
  /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
 sequence implementing an atomic operation.  */

@@ -11206,26 +11233,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx 
dst, rtx value,
emit_insn (gen (dst, mem, value, model));
  }

-/* Emit an atomic operation where the architecture supports it.  */
+/* Operations supported by aarch64_emit_atomic_load_op.  */
+
+enum aarch64_atomic_load_op_code
+{
+  AARCH64_LDOP_PLUS,   /* A + B  */
+  AARCH64_LDOP_XOR,/* A ^ B  */
+  AARCH64_LDOP_OR, /* A | B  */
+  AARCH64_LDOP_BIC /* A & ~B  */
+};


I have a small preference to calling these the same name as the
instructions they will generate, so AARCH64_LDOP_ADD, AARCH64_LDOP_EOR,
AARCH64_LDOP_SET and AARCH64_LDOP_CLR, but I'm happy fo you to leave it
this way if you prefer.



I prefer to keep them related to the operation being implemented rather than how it 
is implemented so I've left them as they are.




-(define_insn_and_split "atomic_"
+(define_expand "atomic_"
+ [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
+   (unspec_volatile:ALLI
+[(atomic_op:ALLI (match_dup 0)
+  (match_operand:ALLI 1 "" ""))
+ (match_operand:SI 2 "const_int_operand")]
+UNSPECV_ATOMIC_OP))
+  (clobber (reg:CC CC_REGNUM))]


This is not needed for the LSE forms of these instructions and may result
in less optimal code genmeration. On the other hand, that will only be in
a corner case and this is only a define_expand. Because of that, it would
be clearer to a reader if you dropped the detailed description of this
in RTL (which is never used) and rewrote it using just the uses of the
operands, as so:


+(define_expand "atomic_"
+ [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
+  (match_operand:ALLI 1 "" "")
+  (match_operand:SI 2 "const_int_operand")]




Switched the new expanders in this and the other patches to the simpler form.




+(define_insn_and_split "aarch64_atomic_"
+ [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+   (unspec_volatile:ALLI
+[(atomic_op:ALLI (match_dup 0)
+  (match_operand:ALLI 1 "" "r"))
+ (match_operand:SI 2 "const_int_operand")]
+UNSPECV_ATOMIC_OP))
+  (clobber (reg:CC CC_REGNUM))
+  (clobber (match_scratch:ALLI 3 "="))
+  (clobber (match_scratch:SI 4 "="))]
+  ""


TARGET_LSE ?


It's not needed here because this pattern is always available.


+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+aarch64_split_atomic_op (, NULL, operands[3], operands[0],
+operands[1], operands[2], operands[4]);
+DONE;
+  }
+)
+
+(define_insn_and_split "aarch64_atomic__lse"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
  (unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "" "r"))
-   (match_operand:SI 2 

Re: [AArch64][PATCH 4/5] Use atomic load-operate instructions for fetch-update patterns.

2015-09-21 Thread James Greenhalgh
On Mon, Sep 21, 2015 at 12:31:21PM +0100, Matthew Wahab wrote:
> I've also removed the CC clobber from the _lse patterns, it was overly 
> cautious.
> 
> Update patch attached, Ok for trunk?
> Matthew

OK.

Thanks,
James

> 
> gcc/
> 2015-09-21  Matthew Wahab  
> 
>   * config/aarch64/aarch64-protos.h
>   (aarch64_atomic_ldop_supported_p): Declare.
>   * config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New.
>   (enum aarch64_atomic_load_op_code): New.
>   (aarch64_emit_atomic_load_op): New.
>   (aarch64_gen_atomic_ldop): Update to support load-operate
>   patterns.
>   * config/aarch64/atomics.md (atomic_): Change
>   to an expander.
>   (aarch64_atomic_): New.
>   (aarch64_atomic__lse): New.
>   (atomic_fetch_): Change to an expander.
>   (aarch64_atomic_fetch_): New.
>   (aarch64_atomic_fetch__lse): New.
> 
> gcc/testsuite/
> 2015-09-21  Matthew Wahab  
> 
>   * gcc.target/aarch64/atomic-inst-ldadd.c: New.
>   * gcc.target/aarch64/atomic-inst-ldlogic.c: New.
> 

> From 5d1bc64d7e9509374076e4c4ff5a285d4b073f24 Mon Sep 17 00:00:00 2001
> From: Matthew Wahab 
> Date: Fri, 7 Aug 2015 17:10:42 +0100
> Subject: [PATCH 4/5] Use atomic instructions for fetch-update patterns.
> 
> Change-Id: I39759f02e61039067ccaabfd52039e4804eddf2f
> ---
>  gcc/config/aarch64/aarch64-protos.h|   2 +
>  gcc/config/aarch64/aarch64.c   | 175 
> -
>  gcc/config/aarch64/atomics.md  | 101 ++--
>  .../gcc.target/aarch64/atomic-inst-ldadd.c |  58 +++
>  .../gcc.target/aarch64/atomic-inst-ldlogic.c   | 109 +
>  5 files changed, 433 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index eba4c76..76ebd6f 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx);
>  void aarch64_expand_compare_and_swap (rtx op[]);
>  void aarch64_split_compare_and_swap (rtx op[]);
>  void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
> +
> +bool aarch64_atomic_ldop_supported_p (enum rtx_code);
>  void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
>  void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
>  
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index dc05c6e..3a1b434 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11064,6 +11064,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
>emit_insn (gen_rtx_SET (bval, x));
>  }
>  
> +/* Test whether the target supports using a atomic load-operate instruction.
> +   CODE is the operation and AFTER is TRUE if the data in memory after the
> +   operation should be returned and FALSE if the data before the operation
> +   should be returned.  Returns FALSE if the operation isn't supported by the
> +   architecture.  */
> +
> +bool
> +aarch64_atomic_ldop_supported_p (enum rtx_code code)
> +{
> +  if (!TARGET_LSE)
> +return false;
> +
> +  switch (code)
> +{
> +case SET:
> +case AND:
> +case IOR:
> +case XOR:
> +case MINUS:
> +case PLUS:
> +  return true;
> +default:
> +  return false;
> +}
> +}
> +
>  /* Emit a barrier, that is appropriate for memory model MODEL, at the end of 
> a
> sequence implementing an atomic operation.  */
>  
> @@ -11206,26 +11232,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx 
> dst, rtx value,
>emit_insn (gen (dst, mem, value, model));
>  }
>  
> -/* Emit an atomic operation where the architecture supports it.  */
> +/* Operations supported by aarch64_emit_atomic_load_op.  */
> +
> +enum aarch64_atomic_load_op_code
> +{
> +  AARCH64_LDOP_PLUS, /* A + B  */
> +  AARCH64_LDOP_XOR,  /* A ^ B  */
> +  AARCH64_LDOP_OR,   /* A | B  */
> +  AARCH64_LDOP_BIC   /* A & ~B  */
> +};
> +
> +/* Emit an atomic load-operate.  */
> +
> +static void
> +aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
> +  machine_mode mode, rtx dst, rtx src,
> +  rtx mem, rtx model)
> +{
> +  typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
> +  const aarch64_atomic_load_op_fn plus[] =
> +  {
> +gen_aarch64_atomic_loadaddqi,
> +gen_aarch64_atomic_loadaddhi,
> +gen_aarch64_atomic_loadaddsi,
> +gen_aarch64_atomic_loadadddi
> +  };
> +  const aarch64_atomic_load_op_fn eor[] =
> +  {
> +gen_aarch64_atomic_loadeorqi,
> +gen_aarch64_atomic_loadeorhi,
> +gen_aarch64_atomic_loadeorsi,
> +gen_aarch64_atomic_loadeordi
> +  };
> +  const aarch64_atomic_load_op_fn ior[] =
> +  {

Re: [AArch64][PATCH 4/5] Use atomic load-operate instructions for fetch-update patterns.

2015-09-18 Thread James Greenhalgh
On Thu, Sep 17, 2015 at 05:47:43PM +0100, Matthew Wahab wrote:
> Hello,
> 
> ARMv8.1 adds atomic swap and atomic load-operate instructions with
> optional memory ordering specifiers. This patch uses the ARMv8.1 atomic
> load-operate instructions to implement the atomic_fetch_
> patterns. This patch also updates the implementation of the atomic_
> patterns, which are treated as versions of the atomic_fetch_ which
> discard the loaded data.
> 
> The general form of the code generated for an atomic_fetch_, with
> destination D, source S, memory address A and memory order MO, depends
> on whether the operation is directly supported by the instruction. If
>  is one of PLUS, IOR or XOR, the code generated is:
> 
>  ld S, D, [A]
> 
> where
> is one {add, set, eor}
> is one of {'', 'a', 'l', 'al'} depending on memory order MO.
> is one of {'', 'b', 'h'} depending on the data size.
> 
> If  is SUB, the code generated, with scratch register r, is:
> 
>  neg r, S
>  ldadd r, D, [A]
> 
> If  is AND, the code generated is:
>  not r, S
>  ldclr r, D, [A]
> 
> Any operation not in {PLUS, IOR, XOR, SUB, AND} is passed to the
> existing aarch64_split_atomic_op function, to implement the operation
> using sequences built with the ARMv8 load-exclusive/store-exclusive
> instructions
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and
> make check. Also tested for aarch64-none-elf with cross-compiled
> check-gcc on an ARMv8.1 emulator with +lse enabled by default.
> 
> Ok for trunk?

Some comments in line below.

> From c4b8eb6d2ca62c57f4a011e06335b918f603ad2a Mon Sep 17 00:00:00 2001
> From: Matthew Wahab 
> Date: Fri, 7 Aug 2015 17:10:42 +0100
> Subject: [PATCH 4/5] Use atomic instructions for fetch-update patterns.
> 
> Change-Id: I39759f02e61039067ccaabfd52039e4804eddf2f
> ---
>  gcc/config/aarch64/aarch64-protos.h|   2 +
>  gcc/config/aarch64/aarch64.c   | 176 
> -
>  gcc/config/aarch64/atomics.md  | 109 -
>  .../gcc.target/aarch64/atomic-inst-ldadd.c |  58 +++
>  .../gcc.target/aarch64/atomic-inst-ldlogic.c   | 109 +
>  5 files changed, 444 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index eba4c76..76ebd6f 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx);
>  void aarch64_expand_compare_and_swap (rtx op[]);
>  void aarch64_split_compare_and_swap (rtx op[]);
>  void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
> +
> +bool aarch64_atomic_ldop_supported_p (enum rtx_code);
>  void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
>  void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
>  
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index dc05c6e..33f9ef3 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11064,6 +11064,33 @@ aarch64_expand_compare_and_swap (rtx operands[])
>emit_insn (gen_rtx_SET (bval, x));
>  }
>  
> +/* Test whether the target supports using a atomic load-operate instruction.
> +   CODE is the operation and AFTER is TRUE if the data in memory after the
> +   operation should be returned and FALSE if the data before the operation
> +   should be returned.  Returns FALSE if the operation isn't supported by the
> +   architecture.
> +  */

Stray newline, leave the */ on the line before.

> +
> +bool
> +aarch64_atomic_ldop_supported_p (enum rtx_code code)
> +{
> +  if (!TARGET_LSE)
> +return false;
> +
> +  switch (code)
> +{
> +case SET:
> +case AND:
> +case IOR:
> +case XOR:
> +case MINUS:
> +case PLUS:
> +  return true;
> +default:
> +  return false;
> +}
> +}
> +
>  /* Emit a barrier, that is appropriate for memory model MODEL, at the end of 
> a
> sequence implementing an atomic operation.  */
>  
> @@ -11206,26 +11233,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx 
> dst, rtx value,
>emit_insn (gen (dst, mem, value, model));
>  }
>  
> -/* Emit an atomic operation where the architecture supports it.  */
> +/* Operations supported by aarch64_emit_atomic_load_op.  */
> +
> +enum aarch64_atomic_load_op_code
> +{
> +  AARCH64_LDOP_PLUS, /* A + B  */
> +  AARCH64_LDOP_XOR,  /* A ^ B  */
> +  AARCH64_LDOP_OR,   /* A | B  */
> +  AARCH64_LDOP_BIC   /* A & ~B  */
> +};

I have a small preference to calling these the same name as the
instructions they will generate, so AARCH64_LDOP_ADD, AARCH64_LDOP_EOR,
AARCH64_LDOP_SET and AARCH64_LDOP_CLR, but I'm happy fo you to leave it
this way if you prefer.

> +
> +/* Emit an atomic 

[AArch64][PATCH 4/5] Use atomic load-operate instructions for fetch-update patterns.

2015-09-17 Thread Matthew Wahab

Hello,

ARMv8.1 adds atomic swap and atomic load-operate instructions with
optional memory ordering specifiers. This patch uses the ARMv8.1 atomic
load-operate instructions to implement the atomic_fetch_
patterns. This patch also updates the implementation of the atomic_
patterns, which are treated as versions of the atomic_fetch_ which
discard the loaded data.

The general form of the code generated for an atomic_fetch_, with
destination D, source S, memory address A and memory order MO, depends
on whether the operation is directly supported by the instruction. If
 is one of PLUS, IOR or XOR, the code generated is:

ld S, D, [A]

where
   is one {add, set, eor}
   is one of {'', 'a', 'l', 'al'} depending on memory order MO.
   is one of {'', 'b', 'h'} depending on the data size.

If  is SUB, the code generated, with scratch register r, is:

neg r, S
ldadd r, D, [A]

If  is AND, the code generated is:
not r, S
ldclr r, D, [A]

Any operation not in {PLUS, IOR, XOR, SUB, AND} is passed to the
existing aarch64_split_atomic_op function, to implement the operation
using sequences built with the ARMv8 load-exclusive/store-exclusive
instructions

Tested the series for aarch64-none-linux-gnu with native bootstrap and
make check. Also tested for aarch64-none-elf with cross-compiled
check-gcc on an ARMv8.1 emulator with +lse enabled by default.

Ok for trunk?
Matthew

gcc/
2015-09-17  Matthew Wahab  

* config/aarch64/aarch64-protos.h
(aarch64_atomic_ldop_supported_p): Declare.
* config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New.
(enum aarch64_atomic_load_op_code): New.
(aarch64_emit_atomic_load_op): New.
(aarch64_gen_atomic_load_op): Update to support load-operate
patterns.
* config/aarch64/atomics.md (atomic_): Change
to an expander.
(aarch64_atomic_): New.
(aarch64_atomic__lse): New.
(atomic_fetch_): Change to an expander.
(aarch64_atomic_fetch_): New.
(aarch64_atomic_fetch__lse): New.

gcc/testsuite/
2015-09-17  Matthew Wahab  

* gcc.target/aarch64/atomic-inst-ldadd.c: New.
* gcc.target/aarch64/atomic-inst-ldlogic.c: New.

>From c4b8eb6d2ca62c57f4a011e06335b918f603ad2a Mon Sep 17 00:00:00 2001
From: Matthew Wahab 
Date: Fri, 7 Aug 2015 17:10:42 +0100
Subject: [PATCH 4/5] Use atomic instructions for fetch-update patterns.

Change-Id: I39759f02e61039067ccaabfd52039e4804eddf2f
---
 gcc/config/aarch64/aarch64-protos.h|   2 +
 gcc/config/aarch64/aarch64.c   | 176 -
 gcc/config/aarch64/atomics.md  | 109 -
 .../gcc.target/aarch64/atomic-inst-ldadd.c |  58 +++
 .../gcc.target/aarch64/atomic-inst-ldlogic.c   | 109 +
 5 files changed, 444 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index eba4c76..76ebd6f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx);
 void aarch64_expand_compare_and_swap (rtx op[]);
 void aarch64_split_compare_and_swap (rtx op[]);
 void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
+
+bool aarch64_atomic_ldop_supported_p (enum rtx_code);
 void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index dc05c6e..33f9ef3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11064,6 +11064,33 @@ aarch64_expand_compare_and_swap (rtx operands[])
   emit_insn (gen_rtx_SET (bval, x));
 }
 
+/* Test whether the target supports using a atomic load-operate instruction.
+   CODE is the operation and AFTER is TRUE if the data in memory after the
+   operation should be returned and FALSE if the data before the operation
+   should be returned.  Returns FALSE if the operation isn't supported by the
+   architecture.
+  */
+
+bool
+aarch64_atomic_ldop_supported_p (enum rtx_code code)
+{
+  if (!TARGET_LSE)
+return false;
+
+  switch (code)
+{
+case SET:
+case AND:
+case IOR:
+case XOR:
+case MINUS:
+case PLUS:
+  return true;
+default:
+  return false;
+}
+}
+
 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
sequence implementing an atomic operation.  */
 
@@ -11206,26 +11233,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
   emit_insn (gen (dst, mem, value, model));
 }
 
-/* Emit an atomic operation where the architecture supports it.  */
+/* Operations supported by