This is a patch 2 to support the Aarch64 SIMD ABI [1] in GCC.

It defines the TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
TARGET_SIMD_CLONE_ADJUST, and TARGET_SIMD_CLONE_USABLE macros
so that GCC can generate SIMD clones on aarch64.

Steve Ellcey
sell...@cavium.com


2018-11-08  Steve Ellcey  <sell...@cavium.com>

        * config/aarch64/aarch64.c (cgraph.h): New include.
        (aarch64_simd_clone_compute_vecsize_and_simdlen): New function.
        (aarch64_simd_clone_adjust): Ditto.
        (aarch64_simd_clone_usable): Ditto.
        (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN): New macro.
        (TARGET_SIMD_CLONE_ADJUST): Ditto.
        (TARGET_SIMD_CLONE_USABLE): Ditto.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c82c7b6..cccf961 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -40,6 +40,7 @@
 #include "regs.h"
 #include "emit-rtl.h"
 #include "recog.h"
+#include "cgraph.h"
 #include "diagnostic.h"
 #include "insn-attr.h"
 #include "alias.h"
@@ -17834,6 +17835,131 @@ aarch64_speculation_safe_value (machine_mode mode,
   return result;
 }
 
+/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
+   CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
+   CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
+   or number of vecsize_mangle variants that should be emitted.  */
+
+static int
+aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+					struct cgraph_simd_clone *clonei,
+					tree base_type,
+					int num ATTRIBUTE_UNUSED)
+{
+  int ret = 0;
+
+  if (clonei->simdlen
+      && (clonei->simdlen < 2
+	  || clonei->simdlen > 1024
+	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+    {
+      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		  "unsupported simdlen %d", clonei->simdlen);
+      return 0;
+    }
+
+  tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+  if (TREE_CODE (ret_type) != VOID_TYPE)
+    switch (TYPE_MODE (ret_type))
+      {
+      case E_QImode:
+      case E_HImode:
+      case E_SImode:
+      case E_DImode:
+      case E_SFmode:
+      case E_DFmode:
+      /* case E_SCmode: */
+      /* case E_DCmode: */
+	break;
+      default:
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported return type %qT for simd\n", ret_type);
+	return 0;
+      }
+
+  tree t;
+  for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
+    /* FIXME: Shouldn't we allow such arguments if they are uniform?  */
+    switch (TYPE_MODE (TREE_TYPE (t)))
+      {
+      case E_QImode:
+      case E_HImode:
+      case E_SImode:
+      case E_DImode:
+      case E_SFmode:
+      case E_DFmode:
+      /* case E_SCmode: */
+      /* case E_DCmode: */
+	break;
+      default:
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported argument type %qT for simd\n", TREE_TYPE (t));
+	return 0;
+      }
+
+  if (TARGET_SIMD)
+    {
+    clonei->vecsize_mangle = 'n';
+    clonei->mask_mode = VOIDmode;
+    clonei->vecsize_int = 128;
+    clonei->vecsize_float = 128;
+
+    if (clonei->simdlen == 0)
+      {
+      if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+	clonei->simdlen = clonei->vecsize_int;
+      else
+	clonei->simdlen = clonei->vecsize_float;
+      clonei->simdlen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+      }
+    else if (clonei->simdlen > 16)
+      {
+      /* If it is possible for given SIMDLEN to pass CTYPE value in
+	 registers (v0-v7) accept that SIMDLEN, otherwise warn and don't
+	 emit corresponding clone.  */
+      int cnt = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)) * clonei->simdlen;
+      if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+	cnt /= clonei->vecsize_int;
+      else
+	cnt /= clonei->vecsize_float;
+      if (cnt > 8)
+	{
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported simdlen %d", clonei->simdlen);
+	return 0;
+	}
+      }
+      ret = 1;
+    }
+  return ret;
+}
+
+/* Add target attribute to SIMD clone NODE if needed.  */
+
+static void
+aarch64_simd_clone_adjust (struct cgraph_node *node ATTRIBUTE_UNUSED)
+{
+}
+
+/* If SIMD clone NODE can't be used in a vectorized loop
+   in current function, return -1, otherwise return a badness of using it
+   (0 if it is most desirable from vecsize_mangle point of view, 1
+   slightly less desirable, etc.).  */
+
+static int
+aarch64_simd_clone_usable (struct cgraph_node *node)
+{
+  switch (node->simdclone->vecsize_mangle)
+    {
+    case 'n':
+      if (!TARGET_SIMD)
+	return -1;
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -18313,6 +18439,16 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_SPECULATION_SAFE_VALUE
 #define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
 
+#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
+#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
+  aarch64_simd_clone_compute_vecsize_and_simdlen
+
+#undef TARGET_SIMD_CLONE_ADJUST
+#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust
+
+#undef TARGET_SIMD_CLONE_USABLE
+#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests

Reply via email to