This patch folds acc_on_device as a regular builtin, but postponed until we know
which compiler we're in. As suggested by Bernd, we use the existing builtin
folding machinery.
Trunk is still using the older PTX runtime scheme (Thomas is working on that),
so the only change there is in the host-side libgomp piece.
Ok for trunk?
nathan
2015-09-29 Nathan Sidwell <nat...@codesourcery.com>
gcc/
* builtins.c (expand_builtin_acc_on_device): Delete.
(expand_builtin): Don't call it.
(fold_builtin_1): Fold acc_on_device.
libgomp/
* oacc-init.c (acc_on_device): Force optimization level.
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c (revision 228250)
+++ libgomp/oacc-init.c (working copy)
@@ -620,10 +620,12 @@ acc_set_device_num (int ord, acc_device_
ialias (acc_set_device_num)
-int
+/* Compile on_device with optimization, so that the compiler expands
+ this, rather than generating infinitely recursive code. */
+
+int __attribute__ ((__optimize__ ("O2")))
acc_on_device (acc_device_t dev)
{
- /* Just rely on the compiler builtin. */
return __builtin_acc_on_device (dev);
}
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c (revision 228250)
+++ gcc/builtins.c (working copy)
@@ -5859,46 +5859,6 @@ expand_stack_save (void)
}
-/* Expand OpenACC acc_on_device.
-
- This has to happen late (that is, not in early folding; expand_builtin_*,
- rather than fold_builtin_*), as we have to act differently for host and
- acceleration device (ACCEL_COMPILER conditional). */
-
-static rtx
-expand_builtin_acc_on_device (tree exp, rtx target)
-{
- if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
- return NULL_RTX;
-
- tree arg = CALL_EXPR_ARG (exp, 0);
-
- /* Return (arg == v1 || arg == v2) ? 1 : 0. */
- machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
- rtx v = expand_normal (arg), v1, v2;
-#ifdef ACCEL_COMPILER
- v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
- v2 = GEN_INT (ACCEL_COMPILER_acc_device);
-#else
- v1 = GEN_INT (GOMP_DEVICE_NONE);
- v2 = GEN_INT (GOMP_DEVICE_HOST);
-#endif
- machine_mode target_mode = TYPE_MODE (integer_type_node);
- if (!target || !register_operand (target, target_mode))
- target = gen_reg_rtx (target_mode);
- emit_move_insn (target, const1_rtx);
- rtx_code_label *done_label = gen_label_rtx ();
- do_compare_rtx_and_jump (v, v1, EQ, false, v_mode, NULL_RTX,
- NULL, done_label, PROB_EVEN);
- do_compare_rtx_and_jump (v, v2, EQ, false, v_mode, NULL_RTX,
- NULL, done_label, PROB_EVEN);
- emit_move_insn (target, const0_rtx);
- emit_label (done_label);
-
- return target;
-}
-
-
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -7036,9 +6996,8 @@ expand_builtin (tree exp, rtx target, rt
break;
case BUILT_IN_ACC_ON_DEVICE:
- target = expand_builtin_acc_on_device (exp, target);
- if (target)
- return target;
+ /* Do library call, if we failed to expand the builtin when
+ folding. */
break;
default: /* just do library call, if unknown builtin */
@@ -10271,6 +10230,27 @@ fold_builtin_1 (location_t loc, tree fnd
return build_empty_stmt (loc);
break;
+ case BUILT_IN_ACC_ON_DEVICE:
+ /* Don't fold on_device until we know which compiler is active. */
+ if (symtab->state == EXPANSION)
+ {
+ unsigned val_host = GOMP_DEVICE_HOST;
+ unsigned val_dev = GOMP_DEVICE_NONE;
+
+#ifdef ACCEL_COMPILER
+ val_host = GOMP_DEVICE_NOT_HOST;
+ val_dev = ACCEL_COMPILER_acc_device;
+#endif
+ tree host = build2 (EQ_EXPR, boolean_type_node, arg0,
+ build_int_cst (integer_type_node, val_host));
+ tree dev = build2 (EQ_EXPR, boolean_type_node, arg0,
+ build_int_cst (integer_type_node, val_dev));
+
+ tree result = build2 (TRUTH_OR_EXPR, boolean_type_node, host, dev);
+ return fold_convert (integer_type_node, result);
+ }
+ break;
+
default:
break;
}