On Tue, Mar 12, 2024 at 11:11:40PM +1100, Nathaniel Shead wrote:
> On Mon, Mar 11, 2024 at 10:36:06AM -0400, Patrick Palka wrote:
> > On Sun, 10 Mar 2024, Nathaniel Shead wrote:
> > 
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu and
> > > aarch64-unknown-linux-gnu, OK for trunk?
> > > 
> > > It's worth noting that the AArch64 machines I had available to test with
> > > didn't have a new enough glibc to reproduce the ICEs in the PR, but this
> > > patch will be necessary (albeit possibly not sufficient) to fix it.
> > > 
> > > -- >8 --
> > > 
> > > Some targets make use of POLY_INT_CSTs and other custom builtin types,
> > > which currently violate some assumptions when streaming. This patch adds
> > > support for them, specifically AArch64 SVE types like __fp16.
> > 
> > It seems other built-in types are handled by adding them to the
> > fixed_trees vector in init_modules (and then we install them first
> > during streaming).  Could we just add all the target-specific types to
> > fixed_trees too?
> > 
> 
> Yes, that works too. Seems cleaner as well, though I had to add it as a
> separate loop because the set of builtin types registered is not
> determined until runtiem (depending on e.g. ABI flags). I also noticed
> that this fixes another PR, on PowerPC, so I've added a test for it.
> Thanks!
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu,
> aarch64-unknown-linux-gnu, and powerpc64le-unknown-linux-gnu;
> OK for trunk?
> 
> -- >8 --
> 
> Some targets make use of POLY_INT_CSTs and other custom builtin types,
> which currently violate some assumptions when streaming. This patch adds
> support for them, such as types like Aarch64 __fp16, PowerPC __ibm128,
> and vector types thereof.
> 
> This patch doesn't provide "full" support of AArch64 SVE, however, since
> for that we would need to support 'target' nodes (tracked in PR108080).
> 
> Adding the new builtin types means that on Aarch64 we now have 217
> global trees created on initialisation (up from 191), so this patch also
> slightly bumps the initial size of the fixed_trees allocation to 250.
> 
>       PR c++/98645
>       PR c++/111224
> 
> gcc/cp/ChangeLog:
> 
>       * module.cc (enum tree_tag): Add new tag for builtin types.
>       (trees_out::start): POLY_INT_CSTs can be emitted.
>       (trees_in::start): Likewise.
>       (trees_out::core_vals): Stream POLY_INT_CSTs.
>       (trees_in::core_vals): Likewise.
>       (trees_out::type_node): Handle vectors with multiple coeffs.
>       (trees_in::tree_node): Likewise.
>       (init_modules): Register target-specific builtin types. Bump
>       initial capacity slightly.
> 
> gcc/testsuite/ChangeLog:
> 
>       * g++.dg/modules/target-aarch64-1_a.C: New test.
>       * g++.dg/modules/target-aarch64-1_b.C: New test.
>       * g++.dg/modules/target-powerpc-1_a.C: New test.
>       * g++.dg/modules/target-powerpc-1_b.C: New test.
> 
> Signed-off-by: Nathaniel Shead <nathanielosh...@gmail.com>
> Reviewed-by: Patrick Palka <ppa...@redhat.com>
> ---
>  gcc/cp/module.cc                              | 32 +++++++++++++------
>  .../g++.dg/modules/target-aarch64-1_a.C       | 17 ++++++++++
>  .../g++.dg/modules/target-aarch64-1_b.C       | 13 ++++++++
>  .../g++.dg/modules/target-powerpc-1_a.C       |  7 ++++
>  .../g++.dg/modules/target-powerpc-1_b.C       | 10 ++++++
>  5 files changed, 69 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C
> 
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index 99055523d91..8aab9ea0bae 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -5173,7 +5173,6 @@ trees_out::start (tree t, bool code_streamed)
>        break;
>  
>      case FIXED_CST:
> -    case POLY_INT_CST:
>        gcc_unreachable (); /* Not supported in C++.  */
>        break;
>  
> @@ -5259,7 +5258,6 @@ trees_in::start (unsigned code)
>  
>      case FIXED_CST:
>      case IDENTIFIER_NODE:
> -    case POLY_INT_CST:
>      case SSA_NAME:
>      case TARGET_MEM_REF:
>      case TRANSLATION_UNIT_DECL:
> @@ -6106,7 +6104,10 @@ trees_out::core_vals (tree t)
>        break;
>  
>      case POLY_INT_CST:
> -      gcc_unreachable (); /* Not supported in C++.  */
> +      if (streaming_p ())
> +     for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
> +       WT (POLY_INT_CST_COEFF (t, ix));
> +      break;
>  
>      case REAL_CST:
>        if (streaming_p ())
> @@ -6615,8 +6616,9 @@ trees_in::core_vals (tree t)
>        break;
>  
>      case POLY_INT_CST:
> -      /* Not suported in C++.  */
> -      return false;
> +      for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
> +     RT (POLY_INT_CST_COEFF (t, ix));
> +      break;
>  
>      case REAL_CST:
>        if (const void *bytes = buf (sizeof (real_value)))
> @@ -9068,8 +9070,8 @@ trees_out::type_node (tree type)
>        if (streaming_p ())
>       {
>         poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type);
> -       /* to_constant asserts that only coeff[0] is of interest.  */
> -       wu (static_cast<unsigned HOST_WIDE_INT> (nunits.to_constant ()));
> +       for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
> +         wu (nunits.coeffs[ix]);
>       }
>        break;
>      }
> @@ -9630,9 +9632,11 @@ trees_in::tree_node (bool is_use)
>  
>         case VECTOR_TYPE:
>           {
> -           unsigned HOST_WIDE_INT nunits = wu ();
> +           poly_uint64 nunits;
> +           for (unsigned ix = 0; ix != NUM_POLY_INT_COEFFS; ix++)
> +             nunits.coeffs[ix] = wu ();
>             if (!get_overrun ())
> -             res = build_vector_type (res, static_cast<poly_int64> (nunits));
> +             res = build_vector_type (res, nunits);
>           }
>           break;
>         }
> @@ -20151,7 +20155,7 @@ init_modules (cpp_reader *reader)
>       some global trees are lazily created and we don't want that to
>       mess with our syndrome of fixed trees.  */
>    unsigned crc = 0;
> -  vec_alloc (fixed_trees, 200);
> +  vec_alloc (fixed_trees, 250);
>  
>    dump () && dump ("+Creating globals");
>    /* Insert the TRANSLATION_UNIT_DECL.  */
> @@ -20169,6 +20173,14 @@ init_modules (cpp_reader *reader)
>         dump () && dump ("+%u", v);
>       }
>      }
> +  /* OS- and machine-specific types are dynamically registered at
> +     runtime, so cannot be part of global_tree_arys.  */
> +  registered_builtin_types && dump ("") && dump ("+\tB:");
> +  for (tree t = registered_builtin_types; t; t = TREE_CHAIN (t))
> +    {
> +      unsigned v = maybe_add_global (TREE_VALUE (t), crc);
> +      dump () && dump ("+%u", v);
> +    }
>    global_crc = crc32_unsigned (crc, fixed_trees->length ());
>    dump ("") && dump ("Created %u unique globals, crc=%x",
>                    fixed_trees->length (), global_crc);
> diff --git a/gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C 
> b/gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C
> new file mode 100644
> index 00000000000..6c699053cdc
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/target-aarch64-1_a.C
> @@ -0,0 +1,17 @@
> +// PR c++/111224
> +// { dg-do compile { target aarch64*-*-* } }
> +// { dg-require-effective-target aarch64_asm_sve_ok }
> +// { dg-additional-options "-fmodules-ts -march=armv8.2-a+sve" }
> +
> +module;
> +
> +// We can't do a header unit of this right now because this
> +// uses target attributes, that we don't yet support.
> +// See also PR c++/108080.
> +#include <arm_sve.h>
> +
> +export module M;
> +
> +export inline void foo(svbool_t x, svfloat16_t f) {
> +  svabs_f16_x(x, f);
> +}
> diff --git a/gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C 
> b/gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C
> new file mode 100644
> index 00000000000..c18691dcf8a
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/target-aarch64-1_b.C
> @@ -0,0 +1,13 @@
> +// PR c++/111224
> +// { dg-module-do link { target aarch64*-*-* } }
> +// { dg-require-effective-target aarch64_asm_sve_ok }
> +// { dg-additional-options "-fmodules-ts -fno-module-lazy 
> -march=armv8.2-a+sve" }
> +
> +#include <arm_sve.h>
> +import M;
> +
> +int main() {
> +  svbool_t x = svptrue_b8 ();
> +  svfloat16_t f = svdup_n_f16(1.0);
> +  foo(x, f);
> +}
> diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C 
> b/gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C
> new file mode 100644
> index 00000000000..693ed101ed5
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/target-powerpc-1_a.C
> @@ -0,0 +1,7 @@
> +// PR c++/98645
> +// { dg-do compile { target powerpc*-*-* } }
> +// { dg-require-effective-target ppc_float128_sw }
> +// { dg-additional-options "-fmodules-ts -mfloat128 -mabi=ieeelongdouble" }
> +
> +export module M;
> +export __ibm128 i = 0.0;
> diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C 
> b/gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C
> new file mode 100644
> index 00000000000..d6b684b556d
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/target-powerpc-1_b.C
> @@ -0,0 +1,10 @@
> +// PR c++/98645
> +// { dg-module-do compile { target powerpc*-*-* } }
> +// { dg-require-effective-target ppc_float128_sw }
> +// { dg-additional-options "-fmodules-ts -mfloat128 -mabi=ieeelongdouble" }
> +
> +import M;
> +
> +int main() {
> +  __ibm128 j = i;
> +}
> -- 
> 2.43.2
> 

Actually just noticed another PR this also seems to fix, PR c++/98688;
here are another two testcases I'll include in the above patch:

diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-2_a.C 
b/gcc/testsuite/g++.dg/modules/target-powerpc-2_a.C
new file mode 100644
index 00000000000..cc18862e55c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/target-powerpc-2_a.C
@@ -0,0 +1,20 @@
+// PR c++/98688
+// { dg-do compile { target powerpc*-*-* } }
+// { dg-additional-options "-fmodules-ts -mcpu=power10 -mmma" }
+
+export module mma_foo0;
+
+typedef unsigned char  vec_t __attribute__((vector_size(16)));
+
+export void
+foo0 (__vector_quad *dst, vec_t *vec, __vector_pair *pvecp)
+{
+  __vector_quad acc;
+  __vector_pair vecp0 = *pvecp;
+  vec_t vec1 = vec[1];
+
+  __builtin_mma_xvf64ger (&acc, vecp0, vec1);
+  __builtin_mma_xvf64gerpp (&acc, vecp0, vec1);
+  __builtin_mma_xvf64gerpn (&acc, vecp0, vec1);
+  dst[0] = acc;
+}
diff --git a/gcc/testsuite/g++.dg/modules/target-powerpc-2_b.C 
b/gcc/testsuite/g++.dg/modules/target-powerpc-2_b.C
new file mode 100644
index 00000000000..9e77ba7afca
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/target-powerpc-2_b.C
@@ -0,0 +1,12 @@
+// PR c++/98688
+// { dg-module-do compile { target powerpc*-*-* } }
+// { dg-additional-options "-fmodules-ts -mcpu=power10 -mmma" }
+
+import mma_foo0;
+
+typedef unsigned char  vec_t __attribute__((vector_size(16)));
+
+void bar(__vector_quad *dst, vec_t *vec, __vector_pair *pvecp)
+{
+    foo0 (dst, vec, pvecp);
+}
-- 
2.43.2

Reply via email to