This is a patch 2 to support the Aarch64 SIMD ABI [1] in GCC. It defines the TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN, TARGET_SIMD_CLONE_ADJUST, and TARGET_SIMD_CLONE_USABLE macros so that GCC can generate SIMD clones on aarch64.
Steve Ellcey sell...@cavium.com 2018-11-08 Steve Ellcey <sell...@cavium.com> * config/aarch64/aarch64.c (cgraph.h): New include. (aarch64_simd_clone_compute_vecsize_and_simdlen): New function. (aarch64_simd_clone_adjust): Ditto. (aarch64_simd_clone_usable): Ditto. (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN): New macro. (TARGET_SIMD_CLONE_ADJUST): Ditto. (TARGET_SIMD_CLONE_USABLE): Ditto.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index c82c7b6..cccf961 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -40,6 +40,7 @@ #include "regs.h" #include "emit-rtl.h" #include "recog.h" +#include "cgraph.h" #include "diagnostic.h" #include "insn-attr.h" #include "alias.h" @@ -17834,6 +17835,131 @@ aarch64_speculation_safe_value (machine_mode mode, return result; } +/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, + CLONEI->vecsize_float and if CLONEI->simdlen is 0, also + CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, + or number of vecsize_mangle variants that should be emitted. */ + +static int +aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + struct cgraph_simd_clone *clonei, + tree base_type, + int num ATTRIBUTE_UNUSED) +{ + int ret = 0; + + if (clonei->simdlen + && (clonei->simdlen < 2 + || clonei->simdlen > 1024 + || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d", clonei->simdlen); + return 0; + } + + tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); + if (TREE_CODE (ret_type) != VOID_TYPE) + switch (TYPE_MODE (ret_type)) + { + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_SFmode: + case E_DFmode: + /* case E_SCmode: */ + /* case E_DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported return type %qT for simd\n", ret_type); + return 0; + } + + tree t; + for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t)) + /* FIXME: Shouldn't we allow such arguments if they are uniform? */ + switch (TYPE_MODE (TREE_TYPE (t))) + { + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_SFmode: + case E_DFmode: + /* case E_SCmode: */ + /* case E_DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported argument type %qT for simd\n", TREE_TYPE (t)); + return 0; + } + + if (TARGET_SIMD) + { + clonei->vecsize_mangle = 'n'; + clonei->mask_mode = VOIDmode; + clonei->vecsize_int = 128; + clonei->vecsize_float = 128; + + if (clonei->simdlen == 0) + { + if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) + clonei->simdlen = clonei->vecsize_int; + else + clonei->simdlen = clonei->vecsize_float; + clonei->simdlen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); + } + else if (clonei->simdlen > 16) + { + /* If it is possible for given SIMDLEN to pass CTYPE value in + registers (v0-v7) accept that SIMDLEN, otherwise warn and don't + emit corresponding clone. */ + int cnt = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)) * clonei->simdlen; + if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) + cnt /= clonei->vecsize_int; + else + cnt /= clonei->vecsize_float; + if (cnt > 8) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d", clonei->simdlen); + return 0; + } + } + ret = 1; + } + return ret; +} + +/* Add target attribute to SIMD clone NODE if needed. */ + +static void +aarch64_simd_clone_adjust (struct cgraph_node *node ATTRIBUTE_UNUSED) +{ +} + +/* If SIMD clone NODE can't be used in a vectorized loop + in current function, return -1, otherwise return a badness of using it + (0 if it is most desirable from vecsize_mangle point of view, 1 + slightly less desirable, etc.). */ + +static int +aarch64_simd_clone_usable (struct cgraph_node *node) +{ + switch (node->simdclone->vecsize_mangle) + { + case 'n': + if (!TARGET_SIMD) + return -1; + return 0; + default: + gcc_unreachable (); + } +} + /* Target-specific selftests. */ #if CHECKING_P @@ -18313,6 +18439,16 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_SPECULATION_SAFE_VALUE #define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value +#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN +#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ + aarch64_simd_clone_compute_vecsize_and_simdlen + +#undef TARGET_SIMD_CLONE_ADJUST +#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust + +#undef TARGET_SIMD_CLONE_USABLE +#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable + #if CHECKING_P #undef TARGET_RUN_TARGET_SELFTESTS #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests