This adds an OpenACC specific thread synchronization builtin and the
nvptx pattern it expands to. Committed to gomp-4_0-branch.
Bernd
Index: gcc/ChangeLog.gomp
===================================================================
--- gcc/ChangeLog.gomp (revision 223869)
+++ gcc/ChangeLog.gomp (working copy)
@@ -1,5 +1,13 @@
2015-05-29 Bernd Schmidt <ber...@codesourcery.com>
+ * config/nvptx/nvptx.md (UNSPECV_BARSYNC): New constant.
+ (oacc_threadbarrier): New expander.
+ (threadbarrier_insn): New pattern.
+ * config/nvptx/nvptx.c (nvptx_cannot_copy_insn_p):
+ * omp-builtins.def (BUILT_IN_GOACC_THREADBARRIER): Define.
+ * builtins.c (expand_oacc_threadbarrier): Expand it.
+ (expand_builtin): Handle it.
+
* gimple.def (GIMPLE_OMP_ENTRY_END): New code.
* gimple.h (gimple_build_omp_entry_end): Declare.
(CASE_GIMPLE_OMP): Add GIMPLE_OMP_ENTRY_END.
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c (revision 223867)
+++ gcc/builtins.c (working copy)
@@ -5946,6 +5946,19 @@ expand_builtin_acc_on_device (tree exp,
#endif
}
+/* Expand a thread synchronization point for OpenACC threads. */
+static void
+expand_oacc_threadbarrier (void)
+{
+#ifdef HAVE_oacc_threadbarrier
+ rtx insn = GEN_FCN (CODE_FOR_oacc_threadbarrier) ();
+ if (insn != NULL_RTX)
+ {
+ emit_insn (insn);
+ }
+#endif
+}
+
/* Expand a thread-id/thread-count builtin for OpenACC. */
static rtx
@@ -7217,6 +7230,10 @@ expand_builtin (tree exp, rtx target, rt
case BUILT_IN_GOACC_THREAD_BROADCAST_LL:
return expand_builtin_oacc_thread_broadcast (exp, target);
+ case BUILT_IN_GOACC_THREADBARRIER:
+ expand_oacc_threadbarrier ();
+ return const0_rtx;
+
default: /* just do library call, if unknown builtin */
break;
}
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c (revision 223867)
+++ gcc/config/nvptx/nvptx.c (working copy)
@@ -2123,14 +2123,20 @@ nvptx_vector_alignment (const_tree type)
return MIN (align, BIGGEST_ALIGNMENT);
}
+/* Indicate that INSN cannot be duplicated. This is true for insns
+ that generate a unique id. To be on the safe side, we also
+ exclude instructions that have to be executed simultaneously by
+ all threads in a warp. */
+
static bool
nvptx_cannot_copy_insn_p (rtx_insn *insn)
{
if (recog_memoized (insn) == CODE_FOR_oacc_thread_broadcastsi)
return true;
+ if (recog_memoized (insn) == CODE_FOR_threadbarrier_insn)
+ return true;
return false;
}
-
/* Record a symbol for mkoffload to enter into the mapping table. */
@@ -2255,7 +2261,7 @@ nvptx_file_end (void)
#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
-#undef TARGET_CANNOT_COPY_INSN_P
+#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
struct gcc_target targetm = TARGET_INITIALIZER;
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md (revision 223867)
+++ gcc/config/nvptx/nvptx.md (working copy)
@@ -62,6 +62,7 @@ (define_c_enum "unspecv" [
UNSPECV_CAS
UNSPECV_XCHG
UNSPECV_WARP_BCAST
+ UNSPECV_BARSYNC
])
(define_attr "subregs_ok" "false,true"
@@ -1457,3 +1458,16 @@ (define_insn "atomic_fetch_<logic><mode>
(match_dup 1))]
"<MODE>mode == SImode || TARGET_SM35"
"%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")
+
+;; ??? Mark as not predicable later?
+(define_insn "threadbarrier_insn"
+ [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_BARSYNC)]
+ ""
+ "bar.sync\\t%0;")
+
+(define_expand "oacc_threadbarrier"
+ [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_BARSYNC)]
+ ""
+{
+ operands[0] = const0_rtx;
+})
Index: gcc/omp-builtins.def
===================================================================
--- gcc/omp-builtins.def (revision 223867)
+++ gcc/omp-builtins.def (working copy)
@@ -81,6 +81,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD
BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD_BROADCAST_LL, "GOACC_thread_broadcast_ll",
BT_FN_ULONGLONG_ULONGLONG, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREADBARRIER, "GOACC_threadbarrier",
+ BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)