On Tue, Mar 12, 2024 at 11:11:40PM +1100, Nathaniel Shead wrote: > On Mon, Mar 11, 2024 at 10:36:06AM -0400, Patrick Palka wrote: > > On Sun, 10 Mar 2024, Nathaniel Shead wrote: > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu and > > > aarch64-unknown-linux-gnu, OK for trunk? > > > > > > It's worth noting that the AArch64 machines I had available to test with > > > didn't have a new enough glibc to reproduce the ICEs in the PR, but this > > > patch will be necessary (albeit possibly not sufficient) to fix it. > > > > > > -- >8 -- > > > > > > Some targets make use of POLY_INT_CSTs and other custom builtin types, > > > which currently violate some assumptions when streaming. This patch adds > > > support for them, specifically AArch64 SVE types like __fp16. > > > > It seems other built-in types are handled by adding them to the > > fixed_trees vector in init_modules (and then we install them first > > during streaming). Could we just add all the target-specific types to > > fixed_trees too? > > > > Yes, that works too. Seems cleaner as well, though I had to add it as a > separate loop because the set of builtin types registered is not > determined until runtiem (depending on e.g. ABI flags). I also noticed > that this fixes another PR, on PowerPC, so I've added a test for it. > Thanks! > > Bootstrapped and regtested on x86_64-pc-linux-gnu, > aarch64-unknown-linux-gnu, and powerpc64le-unknown-linux-gnu; > OK for trunk? > > -- >8 -- > > Some targets make use of POLY_INT_CSTs and other custom builtin types, > which currently violate some assumptions when streaming. This patch adds > support for them, such as types like Aarch64 __fp16, PowerPC __ibm128, > and vector types thereof. > > This patch doesn't provide "full" support of AArch64 SVE, however, since > for that we would need to support 'target' nodes (tracked in PR108080). > > Adding the new builtin types means that on Aarch64 we now have 217 > global trees created on initialisation (up from 191), so this patch also > slightly bumps the initial size of the fixed_trees allocation to 250. > > PR c++/98645 > PR c++/111224 > > gcc/cp/ChangeLog: > > * module.cc (enum tree_tag): Add new tag for builtin types. > (trees_out::start): POLY_INT_CSTs can be emitted. > (trees_in::start): Likewise. > (trees_out::core_vals): Stream POLY_INT_CSTs. > (trees_in::core_vals): Likewise. > (trees_out::type_node): Handle vectors with multiple coeffs. > (trees_in::tree_node): Likewise. > (init_modules): Register target-specific builtin types. Bump > initial capacity slightly. > > gcc/testsuite/ChangeLog: > > * g++.dg/modules/target-aarch64-1_a.C: New test. > * g++.dg/modules/target-aarch64-1_b.C: New test. > * g++.dg/modules/target-powerpc-1_a.C: New test. > * g++.dg/modules/target-powerpc-1_b.C: New test. > > Signed-off-by: Nathaniel Shead <nathanielosh...@gmail.com> > Reviewed-by: Patrick Palka <ppa...@redhat.com> > --- > gcc/cp/module.cc | 32 +++++++++++++------ > .../g++.dg/modules/target-aarch64-1_a.C | 17 ++++++++++ > .../g++.dg/modules/target-aarch64-1_b.C | 13 ++++++++ > .../g++.dg/modules/target-powerpc-1_a.C | 7 ++++ > .../g++.dg/modules/target-powerpc-1_b.C | 10 ++++++ > 5 files changed, 69 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C > create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C > create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C > create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C > > diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc > index 99055523d91..8aab9ea0bae 100644 > --- a/gcc/cp/module.cc > +++ b/gcc/cp/module.cc > @@ -5173,7 +5173,6 @@ trees_out::start (tree t, bool code_streamed) > break; > > case FIXED_CST: > - case POLY_INT_CST: > gcc_unreachable (); /* Not supported in C++. */ > break; > > @@ -5259,7 +5258,6 @@ trees_in::start (unsigned code) > > case FIXED_CST: > case IDENTIFIER_NODE: > - case POLY_INT_CST: > case SSA_NAME: > case TARGET_MEM_REF: > case TRANSLATION_UNIT_DECL: > @@ -6106,7 +6104,10 @@ trees_out::core_vals (tree t) > break; > > case POLY_INT_CST: > - gcc_unreachable (); /* Not supported in C++. */ > + if (streaming_p ()) > + for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++) > + WT (POLY_INT_CST_COEFF (t, ix)); > + break; > > case REAL_CST: > if (streaming_p ()) > @@ -6615,8 +6616,9 @@ trees_in::core_vals (tree t) > break; > > case POLY_INT_CST: > - /* Not suported in C++. */ > - return false; > + for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++) > + RT (POLY_INT_CST_COEFF (t, ix)); > + break; > > case REAL_CST: > if (const void *bytes = buf (sizeof (real_value))) > @@ -9068,8 +9070,8 @@ trees_out::type_node (tree type) > if (streaming_p ()) > { > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type); > - /* to_constant asserts that only coeff[0] is of interest. */ > - wu (static_cast<unsigned HOST_WIDE_INT> (nunits.to_constant ())); > + for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++) > + wu (nunits.coeffs[ix]); > } > break; > } > @@ -9630,9 +9632,11 @@ trees_in::tree_node (bool is_use) > > case VECTOR_TYPE: > { > - unsigned HOST_WIDE_INT nunits = wu (); > + poly_uint64 nunits; > + for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++) > + nunits.coeffs[ix] = wu (); > if (!get_overrun ()) > - res = build_vector_type (res, static_cast<poly_int64> (nunits)); > + res = build_vector_type (res, nunits); > } > break; > } > @@ -20151,7 +20155,7 @@ init_modules (cpp_reader *reader) > some global trees are lazily created and we don't want that to > mess with our syndrome of fixed trees. */ > unsigned crc = 0; > - vec_alloc (fixed_trees, 200); > + vec_alloc (fixed_trees, 250); > > dump () && dump ("+Creating globals"); > /* Insert the TRANSLATION_UNIT_DECL. */ > @@ -20169,6 +20173,14 @@ init_modules (cpp_reader *reader) > dump () && dump ("+%u", v); > } > } > + /* OS- and machine-specific types are dynamically registered at > + runtime, so cannot be part of global_tree_arys. */ > + registered_builtin_types && dump ("") && dump ("+\tB:"); > + for (tree t = registered_builtin_types; t; t = TREE_CHAIN (t)) > + { > + unsigned v = maybe_add_global (TREE_VALUE (t), crc); > + dump () && dump ("+%u", v); > + } > global_crc = crc32_unsigned (crc, fixed_trees->length ()); > dump ("") && dump ("Created %u unique globals, crc=%x", > fixed_trees->length (), global_crc); > diff --git a/gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C > b/gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C > new file mode 100644 > index 00000000000..6c699053cdc > --- /dev/null > +++ b/gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C > @@ -0,0 +1,17 @@ > +// PR c++/111224 > +// { dg-do compile { target aarch64*-*-* } } > +// { dg-require-effective-target aarch64_asm_sve_ok } > +// { dg-additional-options "-fmodules-ts -march=armv8.2-a+sve" } > + > +module; > + > +// We can't do a header unit of this right now because this > +// uses target attributes, that we don't yet support. > +// See also PR c++/108080. > +#include <arm_sve.h> > + > +export module M; > + > +export inline void foo(svbool_t x, svfloat16_t f) { > + svabs_f16_x(x, f); > +} > diff --git a/gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C > b/gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C > new file mode 100644 > index 00000000000..c18691dcf8a > --- /dev/null > +++ b/gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C > @@ -0,0 +1,13 @@ > +// PR c++/111224 > +// { dg-module-do link { target aarch64*-*-* } } > +// { dg-require-effective-target aarch64_asm_sve_ok } > +// { dg-additional-options "-fmodules-ts -fno-module-lazy > -march=armv8.2-a+sve" } > + > +#include <arm_sve.h> > +import M; > + > +int main() { > + svbool_t x = svptrue_b8 (); > + svfloat16_t f = svdup_n_f16(1.0); > + foo(x, f); > +} > diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C > b/gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C > new file mode 100644 > index 00000000000..693ed101ed5 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C > @@ -0,0 +1,7 @@ > +// PR c++/98645 > +// { dg-do compile { target powerpc*-*-* } } > +// { dg-require-effective-target ppc_float128_sw } > +// { dg-additional-options "-fmodules-ts -mfloat128 -mabi=ieeelongdouble" } > + > +export module M; > +export __ibm128 i = 0.0; > diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C > b/gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C > new file mode 100644 > index 00000000000..d6b684b556d > --- /dev/null > +++ b/gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C > @@ -0,0 +1,10 @@ > +// PR c++/98645 > +// { dg-module-do compile { target powerpc*-*-* } } > +// { dg-require-effective-target ppc_float128_sw } > +// { dg-additional-options "-fmodules-ts -mfloat128 -mabi=ieeelongdouble" } > + > +import M; > + > +int main() { > + __ibm128 j = i; > +} > -- > 2.43.2 >
Actually just noticed another PR this also seems to fix, PR c++/98688; here are another two testcases I'll include in the above patch: diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-2_a.C b/gcc/testsuite/g++.dg/modules/target-powerpc-2_a.C new file mode 100644 index 00000000000..cc18862e55c --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/target-powerpc-2_a.C @@ -0,0 +1,20 @@ +// PR c++/98688 +// { dg-do compile { target powerpc*-*-* } } +// { dg-additional-options "-fmodules-ts -mcpu=power10 -mmma" } + +export module mma_foo0; + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +export void +foo0 (__vector_quad *dst, vec_t *vec, __vector_pair *pvecp) +{ + __vector_quad acc; + __vector_pair vecp0 = *pvecp; + vec_t vec1 = vec[1]; + + __builtin_mma_xvf64ger (&acc, vecp0, vec1); + __builtin_mma_xvf64gerpp (&acc, vecp0, vec1); + __builtin_mma_xvf64gerpn (&acc, vecp0, vec1); + dst[0] = acc; +} diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-2_b.C b/gcc/testsuite/g++.dg/modules/target-powerpc-2_b.C new file mode 100644 index 00000000000..9e77ba7afca --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/target-powerpc-2_b.C @@ -0,0 +1,12 @@ +// PR c++/98688 +// { dg-module-do compile { target powerpc*-*-* } } +// { dg-additional-options "-fmodules-ts -mcpu=power10 -mmma" } + +import mma_foo0; + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void bar(__vector_quad *dst, vec_t *vec, __vector_pair *pvecp) +{ + foo0 (dst, vec, pvecp); +} -- 2.43.2