This is one of the patches required to make offloading via the LTO path
work when the machines involved differ.
x86 requires bigger alignments for some types than nvptx does, which
becomes an issue when reading LTO produced by the host compiler. The
problem with having a variable with DECL_ALIGN larger than the stack
alignment is that gcc will try to align the variable dynamically with an
alloca/rounding operation, and there isn't a working alloca on nvptx.
Besides, the overhead would be pointless.
The patch below restricts the alignments to the maximum possible when
reading in LTO data in an offload compiler. Unfortunately
BIGGEST_ALIGNMENT isn't suitable for this, as it can vary at runtime
with attribute((target)), and because vector modes can exceed it, so a
limit based on BIGGEST_ALIGNMENT would be unsuitable for some ports.
Instead I've added a hook called limit_offload_alignment which is called
when reading LTO on an offload compiler. It does nothing anywhere except
on ptx where it limits alignments to 64 bit.
Bootstrapped and tested on x86_64-linux. Ok?
Bernd
* tree-streamer-in.c (unpack_ts_decl_common_value_fields,
unpack_ts_type_common_value_fields): If ACCEL_COMPILER,
restrict alignments to absolute_biggest_alignment.
* config/i386/i386.c (TARGET_ABSOLUTE_BIGGEST_ALIGNMENT):
Define.
* doc/tm.texi.in (TARGET_ABSOLUTE_BIGGEST_ALIGNMENT): Add.
* doc/tm.texi: Regenerate.
* target.def (absolute_biggest_alignment): New DEFHOOKPOD.
Index: gcc/tree-streamer-in.c
===================================================================
--- gcc/tree-streamer-in.c.orig
+++ gcc/tree-streamer-in.c
@@ -217,7 +217,10 @@ unpack_ts_decl_common_value_fields (stru
DECL_EXTERNAL (expr) = (unsigned) bp_unpack_value (bp, 1);
DECL_GIMPLE_REG_P (expr) = (unsigned) bp_unpack_value (bp, 1);
DECL_ALIGN (expr) = (unsigned) bp_unpack_var_len_unsigned (bp);
-
+#ifdef ACCEL_COMPILER
+ if (DECL_ALIGN (expr) > targetm.absolute_biggest_alignment)
+ DECL_ALIGN (expr) = targetm.absolute_biggest_alignment;
+#endif
if (TREE_CODE (expr) == LABEL_DECL)
{
EH_LANDING_PAD_NR (expr) = (int) bp_unpack_var_len_unsigned (bp);
@@ -359,6 +362,10 @@ unpack_ts_type_common_value_fields (stru
TYPE_READONLY (expr) = (unsigned) bp_unpack_value (bp, 1);
TYPE_PRECISION (expr) = bp_unpack_var_len_unsigned (bp);
TYPE_ALIGN (expr) = bp_unpack_var_len_unsigned (bp);
+#ifdef ACCEL_COMPILER
+ if (TYPE_ALIGN (expr) > targetm.absolute_biggest_alignment)
+ TYPE_ALIGN (expr) = targetm.absolute_biggest_alignment;
+#endif
TYPE_ALIAS_SET (expr) = bp_unpack_var_len_int (bp);
}
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -47623,6 +47623,9 @@ ix86_atomic_assign_expand_fenv (tree *ho
#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
+#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
+#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -784,7 +784,10 @@ extern const char *host_detect_local_cpu
rounder than this.
Pentium+ prefers DFmode values to be aligned to 64 bit boundary
- and Pentium Pro XFmode values at 128 bit boundaries. */
+ and Pentium Pro XFmode values at 128 bit boundaries.
+
+ When increasing the maximum, also update
+ TARGET_ABSOLUTE_BIGGEST_ALIGNMENT. */
#define BIGGEST_ALIGNMENT \
(TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128))
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi.orig
+++ gcc/doc/tm.texi
@@ -1003,6 +1003,12 @@ bits. Note that this is not the biggest
just the biggest alignment that, when violated, may cause a fault.
@end defmac
+@deftypevr {Target Hook} HOST_WIDE_INT TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
+If defined, this target hook specifies the absolute biggest alignment
+that a type or variable can have on this machine, otherwise,
+@code{BIGGEST_ALIGNMENT} is used.
+@end deftypevr
+
@defmac MALLOC_ABI_ALIGNMENT
Alignment, in bits, a C conformant malloc implementation has to
provide. If not defined, the default value is @code{BITS_PER_WORD}.
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in.orig
+++ gcc/doc/tm.texi.in
@@ -957,6 +957,8 @@ bits. Note that this is not the biggest
just the biggest alignment that, when violated, may cause a fault.
@end defmac
+@hook TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
+
@defmac MALLOC_ABI_ALIGNMENT
Alignment, in bits, a C conformant malloc implementation has to
provide. If not defined, the default value is @code{BITS_PER_WORD}.
Index: gcc/target.def
===================================================================
--- gcc/target.def.orig
+++ gcc/target.def
@@ -1760,6 +1760,13 @@ HOOK_VECTOR_END (vectorize)
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_"
+DEFHOOKPOD
+(absolute_biggest_alignment,
+ "If defined, this target hook specifies the absolute biggest alignment\n\
+that a type or variable can have on this machine, otherwise,\n\
+@code{BIGGEST_ALIGNMENT} is used.",
+ HOST_WIDE_INT, BIGGEST_ALIGNMENT)
+
/* Allow target specific overriding of option settings after options have
been changed by an attribute or pragma or when it is reset at the
end of the code affected by an attribute or pragma. */