Re: [AArch64][PATCH 5/5] Use atomic load-operate instructions for update-fetch patterns.

Matthew Wahab Mon, 21 Sep 2015 04:41:02 -0700

On 17/09/15 17:54, Matthew Wahab wrote:

ARMv8.1 adds atomic swap and atomic load-operate instructions with
optional memory ordering specifiers. This patch uses the ARMv8.1
load-operate instructions to implement the atomic_<op>_fetch patterns.


The approach is to use the atomic load-operate instruction to atomically
load the data and update memory and then to use the loaded data to
calculate the value that the instruction would have stored. The
calculation attempts to mirror the operation of the atomic instruction.
For example, atomic_and_fetch<mode> is implemented with an atomic
load-bic so the result is also calculated using a BIC instruction.

[...]


2015-09-17  Matthew Wahab  <matthew.wa...@arm.com>

     * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop):
     Adjust declaration.
     * config/aarch64/aarch64.c (aarch64_emit_bic): New.
     (aarch64_gen_atomic_load_op): Adjust comment.  Add parameter
     out_result.  Update to support update-fetch operations.
     * config/aarch64/atomics.md (aarch64_atomic_exchange<mode>_lse):
     Adjust for change to aarch64_gen_atomic_ldop.
     (aarch64_atomic_<atomic_optab><mode>_lse): Likewise.
     (aarch64_atomic_fetch_<atomic_optab><mode>_lse): Likewise.
     (atomic_<atomic_optab>_fetch<mode>): Change to an expander.
     (aarch64_atomic_<atomic_optab>_fetch<mode>): New.
     (aarch64_atomic_<atomic_optab>_fetch<mode>_lse): New.

gcc/testsuite
2015-09-17  Matthew Wahab  <matthew.wa...@arm.com>

     * gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for
     update-fetch operations.
     * gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise.

Attached an updated patch that takes into account the review comments and changes forthe rest of the series.


The changes in this patch:
- Updated emit_bic for changes in the earlier patch.
- Simplified the patterns used in the new expanders.
- Dropped CC clobber from the _lse patterns.

Tested the series for aarch64-none-linux-gnu with native bootstrap and
make check. Also tested for aarch64-none-elf with cross-compiled
check-gcc on an ARMv8.1 emulator with +lse enabled by default.

Ok for trunk?
Matthew

2015-09-21  Matthew Wahab  <matthew.wa...@arm.com>

        * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop):
        Adjust declaration.
        * config/aarch64/aarch64.c (aarch64_emit_bic): New.
        (aarch64_gen_atomic_ldop): Adjust comment.  Add parameter
        out_result.  Update to support update-fetch operations.
        * config/aarch64/atomics.md (aarch64_atomic_exchange<mode>_lse):
        Adjust for change to aarch64_gen_atomic_ldop.
        (aarch64_atomic_<atomic_optab><mode>_lse): Likewise.
        (aarch64_atomic_fetch_<atomic_optab><mode>_lse): Likewise.
        (atomic_<atomic_optab>_fetch<mode>): Change to an expander.
        (aarch64_atomic_<atomic_optab>_fetch<mode>): New.
        (aarch64_atomic_<atomic_optab>_fetch<mode>_lse): New.

gcc/testsuite
2015-09-21  Matthew Wahab  <matthew.wa...@arm.com>

        * gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for
        update-fetch operations.
        * gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise.

>From abd313723964e90b6e7d7785b646c657f6b072f9 Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wa...@arm.com>
Date: Mon, 17 Aug 2015 11:27:18 +0100
Subject: [PATCH 5/5] Use atomic instructions for update-fetch patterns.

Change-Id: I5eef48586fe904f0d2df8c581fb3c12a4a2d9c78
---
 gcc/config/aarch64/aarch64-protos.h                |   2 +-
 gcc/config/aarch64/aarch64.c                       |  72 +++++++++++--
 gcc/config/aarch64/atomics.md                      |  55 +++++++++-
 .../gcc.target/aarch64/atomic-inst-ldadd.c         |  53 ++++++---
 .../gcc.target/aarch64/atomic-inst-ldlogic.c       | 118 ++++++++++++++-------
 5 files changed, 241 insertions(+), 59 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 76ebd6f..dd8ebcc 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -380,7 +380,7 @@ void aarch64_split_compare_and_swap (rtx op[]);
 void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
 
 bool aarch64_atomic_ldop_supported_p (enum rtx_code);
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
+void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
 bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3a1b434..b6cdf7c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11211,6 +11211,25 @@ aarch64_split_compare_and_swap (rtx operands[])
     aarch64_emit_post_barrier (model);
 }
 
+/* Emit a BIC instruction.  */
+
+static void
+aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
+{
+  rtx shift_rtx = GEN_INT (shift);
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
+    case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (dst, s2, shift_rtx, s1));
+}
+
 /* Emit an atomic swap.  */
 
 static void
@@ -11305,13 +11324,14 @@ aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
 }
 
 /* Emit an atomic load+operate.  CODE is the operation.  OUT_DATA is the
-   location to store the data read from memory.  MEM is the memory location to
-   read and modify.  MODEL_RTX is the memory ordering to use.  VALUE is the
-   second operand for the operation.  Either OUT_DATA or OUT_RESULT, but not
-   both, can be NULL.  */
+   location to store the data read from memory.  OUT_RESULT is the location to
+   store the result of the operation.  MEM is the memory location to read and
+   modify.  MODEL_RTX is the memory ordering to use.  VALUE is the second
+   operand for the operation.  Either OUT_DATA or OUT_RESULT, but not both, can
+   be NULL.  */
 
 void
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
+aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
 			 rtx mem, rtx value, rtx model_rtx)
 {
   machine_mode mode = GET_MODE (mem);
@@ -11324,12 +11344,15 @@ aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
   if (out_data)
     out_data = gen_lowpart (mode, out_data);
 
+  if (out_result)
+    out_result = gen_lowpart (mode, out_result);
+
   /* Make sure the value is in a register, putting it into a destination
      register if it needs to be manipulated.  */
   if (!register_operand (value, mode)
       || code == AND || code == MINUS)
     {
-      src = out_data;
+      src = out_result ? out_result : out_data;
       emit_move_insn (src, gen_lowpart (mode, value));
     }
   else
@@ -11395,6 +11418,43 @@ aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
     }
 
   aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
+
+  /* If necessary, calculate the data in memory after the update by redoing the
+     operation from values in registers.  */
+  if (!out_result)
+    return;
+
+  if (short_mode)
+    {
+      src = gen_lowpart (wmode, src);
+      out_data = gen_lowpart (wmode, out_data);
+      out_result = gen_lowpart (wmode, out_result);
+    }
+
+  x = NULL_RTX;
+
+  switch (code)
+    {
+    case MINUS:
+    case PLUS:
+      x = gen_rtx_PLUS (wmode, out_data, src);
+      break;
+    case IOR:
+      x = gen_rtx_IOR (wmode, out_data, src);
+      break;
+    case XOR:
+      x = gen_rtx_XOR (wmode, out_data, src);
+      break;
+    case AND:
+      aarch64_emit_bic (wmode, out_result, out_data, src, 0);
+      return;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_set_insn (out_result, x);
+
+  return;
 }
 
 /* Split an atomic operation.  */
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index e0d8856..e7ac5f6 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -219,7 +219,7 @@
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (SET, operands[0], operands[1],
+    aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1],
 			     operands[2], operands[3]);
     DONE;
   }
@@ -280,7 +280,7 @@
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[3], operands[0],
+    aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
 			     operands[1], operands[2]);
     DONE;
   }
@@ -368,7 +368,7 @@
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[0], operands[1],
+    aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
 			     operands[2], operands[3]);
     DONE;
   }
@@ -398,7 +398,31 @@
   }
 )
 
-(define_insn_and_split "atomic_<atomic_optab>_fetch<mode>"
+;; Load-operate-store, returning the original memory data.
+
+(define_expand "atomic_<atomic_optab>_fetch<mode>"
+ [(match_operand:ALLI 0 "register_operand" "")
+  (atomic_op:ALLI
+   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+   (match_operand:ALLI 2 "<atomic_op_operand>" ""))
+  (match_operand:SI 3 "const_int_operand")]
+ ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx value = operands[2];
+
+  /* Use an atomic load-operate instruction when possible.  */
+  if (aarch64_atomic_ldop_supported_p (<CODE>))
+    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+  else
+    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
+
+  emit_insn (gen (operands[0], operands[1], value, operands[3]));
+
+  DONE;
+})
+
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")
     (atomic_op:ALLI
       (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
@@ -421,6 +445,29 @@
   }
 )
 
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (atomic_op:ALLI
+     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+     (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_dup 1)
+       (match_dup 2)
+       (match_operand:SI 3 "const_int_operand")]
+      UNSPECV_ATOMIC_LDOP))
+     (clobber (match_scratch:ALLI 4 "=r"))]
+  "TARGET_LSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
+			     operands[2], operands[3]);
+    DONE;
+  }
+)
+
 (define_insn_and_split "atomic_nand_fetch<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")
     (not:ALLI
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
index c21d2ed..4b2282c 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
@@ -31,6 +31,29 @@
     __atomic_fetch_sub (val, foo, MODEL);				\
   }
 
+#define ADD_LOAD(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_add_fetch (val, foo, MODEL);			\
+  }
+
+#define ADD_LOAD_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_add_fetch (val, foo, MODEL);				\
+  }
+
+#define SUB_LOAD(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_sub_fetch (val, foo, MODEL);			\
+  }
+
+#define SUB_LOAD_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_sub_fetch (val, foo, MODEL);				\
+  }
 
 TEST (load_add, LOAD_ADD)
 TEST (load_add_notreturn, LOAD_ADD_NORETURN)
@@ -38,20 +61,26 @@ TEST (load_add_notreturn, LOAD_ADD_NORETURN)
 TEST (load_sub, LOAD_SUB)
 TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
 
-/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */
+TEST (add_load, ADD_LOAD)
+TEST (add_load_notreturn, ADD_LOAD_NORETURN)
+
+TEST (sub_load, SUB_LOAD)
+TEST (sub_load_notreturn, SUB_LOAD_NORETURN)
+
+/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddab\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */
 
-/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */
-/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddah\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */
 
-/* { dg-final { scan-assembler-times "ldadd\t" 16} } */
-/* { dg-final { scan-assembler-times "ldadda\t" 32} } */
-/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */
-/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadd\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadda\t" 64} } */
+/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddal\t" 64} } */
 
 /* { dg-final { scan-assembler-not "ldaxr\t" } } */
 /* { dg-final { scan-assembler-not "stlxr\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
index fd0f484..4879d52 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
@@ -43,6 +43,42 @@
     __atomic_fetch_xor (val, foo, MODEL);				\
   }
 
+#define OR_LOAD(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_or_fetch (val, foo, MODEL);				\
+  }
+
+#define OR_LOAD_NORETURN(FN, TY, MODEL)					\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_or_fetch (val, foo, MODEL);				\
+  }
+
+#define AND_LOAD(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_and_fetch (val, foo, MODEL);			\
+  }
+
+#define AND_LOAD_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_and_fetch (val, foo, MODEL);				\
+  }
+
+#define XOR_LOAD(FN, TY, MODEL)						\
+  TY FNNAME (FN, TY) (TY* val, TY* foo)					\
+  {									\
+    return __atomic_xor_fetch (val, foo, MODEL);			\
+  }
+
+#define XOR_LOAD_NORETURN(FN, TY, MODEL)				\
+  void FNNAME (FN, TY) (TY* val, TY* foo)				\
+  {									\
+    __atomic_xor_fetch (val, foo, MODEL);				\
+  }
+
 
 TEST (load_or, LOAD_OR)
 TEST (load_or_notreturn, LOAD_OR_NORETURN)
@@ -53,56 +89,66 @@ TEST (load_and_notreturn, LOAD_AND_NORETURN)
 TEST (load_xor, LOAD_XOR)
 TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
 
+TEST (or_load, OR_LOAD)
+TEST (or_load_notreturn, OR_LOAD_NORETURN)
+
+TEST (and_load, AND_LOAD)
+TEST (and_load_notreturn, AND_LOAD_NORETURN)
+
+TEST (xor_load, XOR_LOAD)
+TEST (xor_load_notreturn, XOR_LOAD_NORETURN)
+
+
 /* Load-OR.  */
 
-/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldseth\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldseth\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldset\t" 8} } */
-/* { dg-final { scan-assembler-times "ldseta\t" 16} } */
-/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */
+/* { dg-final { scan-assembler-times "ldset\t" 16} } */
+/* { dg-final { scan-assembler-times "ldseta\t" 32} } */
+/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetal\t" 32} } */
 
 /* Load-AND.  */
 
-/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralb\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralh\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldclr\t" 8} */
-/* { dg-final { scan-assembler-times "ldclra\t" 16} } */
-/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldclral\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclr\t" 16} */
+/* { dg-final { scan-assembler-times "ldclra\t" 32} } */
+/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclral\t" 32} } */
 
 /* Load-XOR.  */
 
-/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */
-/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */
 
-/* { dg-final { scan-assembler-times "ldeor\t" 8} */
-/* { dg-final { scan-assembler-times "ldeora\t" 16} } */
-/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */
-/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeor\t" 16} */
+/* { dg-final { scan-assembler-times "ldeora\t" 32} } */
+/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeoral\t" 32} } */
 
 /* { dg-final { scan-assembler-not "ldaxr\t" } } */
 /* { dg-final { scan-assembler-not "stlxr\t" } } */
-- 
2.1.4

Re: [AArch64][PATCH 5/5] Use atomic load-operate instructions for update-fetch patterns.

Reply via email to