This patch is largely a copy/paste from the aarch64 SVE counterpart,
and adds support for tuples to the MVE intrinsics framework.
Introduce function_resolver::infer_tuple_type which will be used to
resolve overloaded vst2q and vst4q function names in a later patch.
Update MAX_TUPLE_SIZE to 4: MVE uses 2- and 4- tuples, we just don't
use the 3rd entry of the table, which is small so it's not a big
waste. As a result, fix access to acle_vector_types in a few places.
The new wrap_type_in_struct, register_type_decl and infer_tuple_type
are largely copies of the aarch64 versions, and
register_builtin_tuple_types is very similar.
The patch adds arm_array_mode, which is used by
build_array_type_nelts.
gcc/ChangeLog:
* config/arm/arm-mve-builtins.cc (wrap_type_in_struct): New.
(register_type_decl): New.
(register_builtin_tuple_types): Fix support for tuples.
(function_resolver::infer_tuple_type): New.
* config/arm/arm-mve-builtins.h (MAX_TUPLE_SIZE): Set to 4.
(function_resolver::infer_tuple_type): Declare.
* config/arm/arm.cc (arm_array_mode): New.
(TARGET_ARRAY_MODE): New.
---
gcc/config/arm/arm-mve-builtins.cc | 76 ++++++++++++++++++++++++++----
gcc/config/arm/arm-mve-builtins.h | 3 +-
gcc/config/arm/arm.cc | 24 ++++++++++
3 files changed, 92 insertions(+), 11 deletions(-)
diff --git a/gcc/config/arm/arm-mve-builtins.cc
b/gcc/config/arm/arm-mve-builtins.cc
index 3982d20058b..255aed25600 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -39,6 +39,7 @@
#include "gimple-iterator.h"
#include "explow.h"
#include "emit-rtl.h"
+#include "stor-layout.h"
#include "langhooks.h"
#include "stringpool.h"
#include "attribs.h"
@@ -463,6 +464,48 @@ register_vector_type (vector_type_index type)
acle_vector_types[0][type] = vectype;
}
+/* Return a structure type that contains a single field of type FIELD_TYPE.
+ The field is called __val, but that's an internal detail rather than
+ an exposed part of the API. */
+static tree
+wrap_type_in_struct (tree field_type)
+{
+ tree field = build_decl (input_location, FIELD_DECL,
+ get_identifier ("__val"), field_type);
+ tree struct_type = lang_hooks.types.make_type (RECORD_TYPE);
+ DECL_FIELD_CONTEXT (field) = struct_type;
+ TYPE_FIELDS (struct_type) = field;
+ layout_type (struct_type);
+ return struct_type;
+}
+
+/* Register a built-in TYPE_DECL called NAME for TYPE. This is used/needed
+ when TYPE is a structure type. */
+static void
+register_type_decl (tree type, const char *name)
+{
+ tree decl = build_decl (input_location, TYPE_DECL,
+ get_identifier (name), type);
+ TYPE_NAME (type) = decl;
+ TYPE_STUB_DECL (type) = decl;
+ lang_hooks.decls.pushdecl (decl);
+ /* ??? Undo the effect of set_underlying_type for C. The C frontend
+ doesn't recognize DECL as a built-in because (as intended) the decl has
+ a real location instead of BUILTINS_LOCATION. The frontend therefore
+ treats the decl like a normal C "typedef struct foo foo;", expecting
+ the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead
+ of the named one we attached above. It then sets DECL_ORIGINAL_TYPE
+ on the supposedly unnamed decl, creating a circularity that upsets
+ dwarf2out.
+
+ We don't want to follow the normal C model and create "struct foo"
+ tags for tuple types since (a) the types are supposed to be opaque
+ and (b) they couldn't be defined as a real struct anyway. Treating
+ the TYPE_DECLs as "typedef struct foo foo;" without creating
+ "struct foo" would lead to confusing error messages. */
+ DECL_ORIGINAL_TYPE (decl) = NULL_TREE;
+}
+
/* Register tuple types of element type TYPE under their arm_mve_types.h
names. */
static void
@@ -479,7 +522,7 @@ register_builtin_tuple_types (vector_type_index type)
|| (info->requires_float && !TARGET_HAVE_MVE_FLOAT))
{
for (unsigned int num_vectors = 2; num_vectors <= 4; num_vectors += 2)
- acle_vector_types[num_vectors >> 1][type] = void_type_node;
+ acle_vector_types[num_vectors - 1][type] = void_type_node;
return;
}
@@ -493,15 +536,18 @@ register_builtin_tuple_types (vector_type_index type)
tree vectype = acle_vector_types[0][type];
tree arrtype = build_array_type_nelts (vectype, num_vectors);
- gcc_assert (TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype));
- tree field = build_decl (input_location, FIELD_DECL,
- get_identifier ("val"), arrtype);
-
- tree t = lang_hooks.types.simulate_record_decl (input_location, buffer,
- make_array_slice (&field,
- 1));
- gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t));
- acle_vector_types[num_vectors >> 1][type] = TREE_TYPE (t);
+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (arrtype))
+ && TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype)
+ && TYPE_ALIGN (arrtype) == 64);
+
+ tree tuple_type = wrap_type_in_struct (arrtype);
+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type))
+ && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type)
+ && TYPE_ALIGN (tuple_type) == 64);
+
+ register_type_decl (tuple_type, buffer);
+
+ acle_vector_types[num_vectors - 1][type] = tuple_type;
}
}
@@ -1328,6 +1374,16 @@ function_resolver::infer_vector_type (unsigned int argno)
return infer_vector_or_tuple_type (argno, 1);
}
+/* If the function operates on tuples of vectors, require argument ARGNO to be
+ a tuple with the appropriate number of vectors, otherwise require it to be a
+ single vector. Return the associated type suffix on success. Report an
+ error and return NUM_TYPE_SUFFIXES on failure. */
+type_suffix_index
+function_resolver::infer_tuple_type (unsigned int argno)
+{
+ return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
+}
+
/* Require argument ARGNO to be a vector or scalar argument. Return true
if it is, otherwise report an appropriate error. */
bool
diff --git a/gcc/config/arm/arm-mve-builtins.h
b/gcc/config/arm/arm-mve-builtins.h
index c6a929c3eee..cdc07b4e51f 100644
--- a/gcc/config/arm/arm-mve-builtins.h
+++ b/gcc/config/arm/arm-mve-builtins.h
@@ -81,7 +81,7 @@
constant. */
namespace arm_mve {
/* The maximum number of vectors in an ACLE tuple type. */
-const unsigned int MAX_TUPLE_SIZE = 3;
+const unsigned int MAX_TUPLE_SIZE = 4;
/* Used to represent the default merge argument index for _m functions.
The actual index depends on how many arguments the function takes. */
@@ -387,6 +387,7 @@ public:
type_suffix_index infer_pointer_type (unsigned int);
type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
type_suffix_index infer_vector_type (unsigned int);
+ type_suffix_index infer_tuple_type (unsigned int);
bool require_vector_or_scalar_type (unsigned int);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 6f11b6c816d..24949c6a6b5 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -279,6 +279,7 @@ static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
+static opt_machine_mode arm_array_mode (machine_mode, unsigned HOST_WIDE_INT);
static bool arm_array_mode_supported_p (machine_mode,
unsigned HOST_WIDE_INT);
static machine_mode arm_preferred_simd_mode (scalar_mode);
@@ -516,6 +517,8 @@ static const scoped_attribute_specs *const
arm_attribute_table[] =
#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_ARRAY_MODE
+#define TARGET_ARRAY_MODE arm_array_mode
#undef TARGET_ARRAY_MODE_SUPPORTED_P
#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
@@ -29786,6 +29789,27 @@ arm_vector_mode_supported_p (machine_mode mode)
return false;
}
+/* Implements target hook array_mode. */
+static opt_machine_mode
+arm_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
+{
+ if (TARGET_HAVE_MVE
+ /* MVE accepts only tuples of 2 or 4 vectors. */
+ && (nelems == 2
+ || nelems == 4))
+ {
+ machine_mode struct_mode;
+ FOR_EACH_MODE_IN_CLASS (struct_mode, GET_MODE_CLASS (mode))
+ {
+ if (GET_MODE_INNER (struct_mode) == GET_MODE_INNER (mode)
+ && known_eq (GET_MODE_NUNITS (struct_mode),
+ GET_MODE_NUNITS (mode) * nelems))
+ return struct_mode;
+ }
+ }
+ return opt_machine_mode ();
+}
+
/* Implements target hook array_mode_supported_p. */
static bool
--
2.34.1