In the test below, we cannot cache either [x] or [y] neither before the load of flag1 nor the load of flag2. This is because the corresponding store/release can flush a different value of x or y:

+  if (__atomic_load_n (&flag1, __ATOMIC_ACQUIRE))
+    i = x + y;
+
+  if (__atomic_load_n (&flag2, __ATOMIC_ACQUIRE))
+    a = 10;
+  j = x + y;

For example, on x86-64, we are hoisting "x" and "y" before the load of flag2:

        movl    flag1(%rip), %eax
        movl    x(%rip), %edx           <-- hoist of X
        testl   %eax, %eax
        movl    y(%rip), %eax           <-- hoist of Y
        je      .L2
        leal    (%edx,%eax), %ecx
        movl    %ecx, i(%rip)
.L2:
        movl    flag2(%rip), %ecx
        testl   %ecx, %ecx
        je      .L3
        movl    $10, a(%rip)
.L3:
        addl    %edx, %eax              <-- x/y may have changed by the
                                            acquire of flag2.
        movl    %eax, j(%rip)
        ret

(For that matter, we are also hoisting "x" before the actual test of flag1 as well, but I believe this is allowed since flag1 has already been loaded.)

The pass at fault here is the combine stack adjustment RTL pass. I have not looked into why this is happening, but I wanted to get this test into the branch lest we forget about it.

Is this OK for the branch?  Is my understanding correct?

Aldy
Index: atomic-hoist-1.c
===================================================================
--- atomic-hoist-1.c    (revision 0)
+++ atomic-hoist-1.c    (revision 0)
@@ -0,0 +1,96 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that a hoist is not performed across an acquire barrier.  */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+int flag1=1, flag2=1;
+
+unsigned int x=1, y=2, i=0x1234, j=0x5678, a;
+
+/* These two tables are random numbers such that there are no two
+   pairs between the both tables that yield the same sum.  */
+
+unsigned int table1[16] = {
+  24747, 19512, 3692, 25985,
+  25079, 24, 3310, 22073,
+  4026, 25641, 35240, 35542,
+  24783, 17378, 12184, 23755
+};
+
+unsigned int table2[16] = {
+  2467, 37461, 14064, 36460,
+  46434, 8387, 42174, 36763,
+  49205, 48759, 10526, 3446,
+  14035, 2195, 6798, 38782
+};
+
+int table_cycle_size = 16;
+
+/* At each instruction, get a new X and Y from the tables to later
+   verify that we have not reused a value incorrectly.  */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  x = table1[current];
+  y = table2[current];
+}
+
+/* Return true if error, otherwise 0.  */
+int verify_result ()
+{
+  /* [i] should not equal [j], because that would mean that we hoisted
+     [x] and [y] instead of loading them again.  */
+  int fail = i == j;
+  if (fail)
+    printf("FAIL: i (%u) should not equal j (%u)\n", i, j);
+  return fail;
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  /* The values of x or y should not be hoisted across reads of
+     flag[12].
+
+     For example, when the second load below synchronizes with another
+     thread, the synchronization is with a release, and that release
+     may cause a stored value of x/y to be flushed and become visible.
+     So, for this case, it is incorrect for CSE/CSA/and-others to
+     hoist x or y above the load of flag2.  */
+
+  /* Execute loads with value changing at various cyclic values.  */
+  if (__atomic_load_n (&flag1, __ATOMIC_ACQUIRE))
+    i = x + y;
+ 
+  if (__atomic_load_n (&flag2, __ATOMIC_ACQUIRE))
+    a = 10;
+  j = x + y;
+
+  /* Since x and y have been changing at each instruction above, i and j
+     should be different.  If they are the same, we have hoisted
+     something incorrectly.  */
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}

Reply via email to