On Wed, 2021-09-01 at 11:13 -0500, Bill Schmidt via Gcc-patches wrote: Hi, Just a couple cosmetic nits noted below, the majority if which is also in the original code this is based on. THanks -Will
> Although this patch looks quite large, the changes are fairly minimal. > Most of it is duplicating the large function that does the overload > resolution using the automatically generated data structures instead of > the old hand-generated ones. This doesn't make the patch terribly easy to > review, unfortunately. Just be aware that generally we aren't changing > the logic and functionality of overload handling. ok > > 2021-08-31 Bill Schmidt <wschm...@linux.ibm.com> > > gcc/ > * config/rs6000/rs6000-c.c (rs6000-builtins.h): New include. > (altivec_resolve_new_overloaded_builtin): New forward decl. > (rs6000_new_builtin_type_compatible): New function. > (altivec_resolve_overloaded_builtin): Call > altivec_resolve_new_overloaded_builtin. > (altivec_build_new_resolved_builtin): New function. > (altivec_resolve_new_overloaded_builtin): Likewise. > * config/rs6000/rs6000-call.c (rs6000_new_builtin_is_supported): > Likewise. > * config/rs6000/rs6000-gen-builtins.c (write_decls): Remove _p from > name of rs6000_new_builtin_is_supported. ok > --- > gcc/config/rs6000/rs6000-c.c | 1088 +++++++++++++++++++++++ > gcc/config/rs6000/rs6000-call.c | 53 ++ > gcc/config/rs6000/rs6000-gen-builtins.c | 2 +- > 3 files changed, 1142 insertions(+), 1 deletion(-) > > diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c > index afcb5bb6e39..aafb4e6a98f 100644 > --- a/gcc/config/rs6000/rs6000-c.c > +++ b/gcc/config/rs6000/rs6000-c.c > @@ -35,6 +35,9 @@ > #include "langhooks.h" > #include "c/c-tree.h" > > +#include "rs6000-builtins.h" > + > +static tree altivec_resolve_new_overloaded_builtin (location_t, tree, void > *); > > > /* Handle the machine specific pragma longcall. Its syntax is > @@ -811,6 +814,30 @@ is_float128_p (tree t) > && t == long_double_type_node)); > } > > +static bool > +rs6000_new_builtin_type_compatible (tree t, tree u) > +{ > + if (t == error_mark_node) > + return false; > + > + if (INTEGRAL_TYPE_P (t) && INTEGRAL_TYPE_P (u)) > + return true; > + > + if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128 > + && is_float128_p (t) && is_float128_p (u)) > + return true; > + > + if (POINTER_TYPE_P (t) && POINTER_TYPE_P (u)) > + { > + t = TREE_TYPE (t); > + u = TREE_TYPE (u); > + if (TYPE_READONLY (u)) > + t = build_qualified_type (t, TYPE_QUAL_CONST); > + } > + > + return lang_hooks.types_compatible_p (t, u); > +} > + ok > static inline bool > rs6000_builtin_type_compatible (tree t, int id) > { > @@ -927,6 +954,10 @@ tree > altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, > void *passed_arglist) > { > + if (new_builtins_are_live) > + return altivec_resolve_new_overloaded_builtin (loc, fndecl, > + passed_arglist); > + > vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> > (passed_arglist); > unsigned int nargs = vec_safe_length (arglist); > enum rs6000_builtins fcode ok > @@ -1930,3 +1961,1060 @@ altivec_resolve_overloaded_builtin (location_t loc, > tree fndecl, > return error_mark_node; > } > } > + > +/* Build a tree for a function call to an Altivec non-overloaded builtin. > + The overloaded builtin that matched the types and args is described > + by DESC. The N arguments are given in ARGS, respectively. > + > + Actually the only thing it does is calling fold_convert on ARGS, with > + a small exception for vec_{all,any}_{ge,le} predicates. */ > + > +static tree > +altivec_build_new_resolved_builtin (tree *args, int n, tree fntype, > + tree ret_type, > + rs6000_gen_builtins bif_id, > + rs6000_gen_builtins ovld_id) > +{ > + tree argtypes = TYPE_ARG_TYPES (fntype); > + tree arg_type[MAX_OVLD_ARGS]; > + tree fndecl = rs6000_builtin_decls_x[bif_id]; > + tree call; > + > + for (int i = 0; i < n; i++) > + arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes); > + > + /* The AltiVec overloading implementation is overall gross, but this > + is particularly disgusting. The vec_{all,any}_{ge,le} builtins > + are completely different for floating-point vs. integer vector > + types, because the former has vcmpgefp, but the latter should use > + vcmpgtXX. > + > + In practice, the second and third arguments are swapped, and the > + condition (LT vs. EQ, which is recognizable by bit 1 of the first > + argument) is reversed. Patch the arguments here before building > + the resolved CALL_EXPR. */ > + if (n == 3 > + && ovld_id == RS6000_OVLD_VEC_CMPGE_P > + && bif_id != RS6000_BIF_VCMPGEFP_P > + && bif_id != RS6000_BIF_XVCMPGEDP_P) > + { > + std::swap (args[1], args[2]); > + std::swap (arg_type[1], arg_type[2]); > + > + args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0], > + build_int_cst (NULL_TREE, 2)); > + } > + > + /* If the number of arguments to an overloaded function increases, > + we must expand this switch. */ > + gcc_assert (MAX_OVLD_ARGS <= 4); Ok. > + > + switch (n) > + { > + case 0: > + call = build_call_expr (fndecl, 0); > + break; > + case 1: > + call = build_call_expr (fndecl, 1, > + fully_fold_convert (arg_type[0], args[0])); > + break; > + case 2: > + call = build_call_expr (fndecl, 2, > + fully_fold_convert (arg_type[0], args[0]), > + fully_fold_convert (arg_type[1], args[1])); > + break; > + case 3: > + call = build_call_expr (fndecl, 3, > + fully_fold_convert (arg_type[0], args[0]), > + fully_fold_convert (arg_type[1], args[1]), > + fully_fold_convert (arg_type[2], args[2])); > + break; > + case 4: > + call = build_call_expr (fndecl, 4, > + fully_fold_convert (arg_type[0], args[0]), > + fully_fold_convert (arg_type[1], args[1]), > + fully_fold_convert (arg_type[2], args[2]), > + fully_fold_convert (arg_type[3], args[3])); > + break; > + default: > + gcc_unreachable (); > + } > + return fold_convert (ret_type, call); > +} > + > +/* Implementation of the resolve_overloaded_builtin target hook, to > + support Altivec's overloaded builtins. */ > + > +static tree > +altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl, > + void *passed_arglist) > +{ > + vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> > (passed_arglist); > + unsigned int nargs = vec_safe_length (arglist); > + enum rs6000_gen_builtins fcode > + = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); > + tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); > + tree types[MAX_OVLD_ARGS], args[MAX_OVLD_ARGS]; > + unsigned int n; > + > + /* Return immediately if this isn't an overload. */ > + if (fcode <= RS6000_OVLD_NONE) > + return NULL_TREE; > + > + unsigned int adj_fcode = fcode - RS6000_OVLD_NONE; > + > + if (TARGET_DEBUG_BUILTIN) > + fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n", > + (int) fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); > + > + /* vec_lvsl and vec_lvsr are deprecated for use with LE element order. */ > + if (fcode == RS6000_OVLD_VEC_LVSL && !BYTES_BIG_ENDIAN) > + warning (OPT_Wdeprecated, > + "%<vec_lvsl%> is deprecated for little endian; use " > + "assignment for unaligned loads and stores"); > + else if (fcode == RS6000_OVLD_VEC_LVSR && !BYTES_BIG_ENDIAN) > + warning (OPT_Wdeprecated, > + "%<vec_lvsr%> is deprecated for little endian; use " > + "assignment for unaligned loads and stores"); > + > + if (fcode == RS6000_OVLD_VEC_MUL) > + { > + /* vec_mul needs to be special cased because there are no instructions > + for it for the {un}signed char, {un}signed short, and {un}signed int > + types. */ > + if (nargs != 2) > + { > + error ("builtin %qs only accepts 2 arguments", "vec_mul"); > + return error_mark_node; > + } > + > + tree arg0 = (*arglist)[0]; > + tree arg0_type = TREE_TYPE (arg0); > + tree arg1 = (*arglist)[1]; > + tree arg1_type = TREE_TYPE (arg1); > + > + /* Both arguments must be vectors and the types must be compatible. */ > + if (TREE_CODE (arg0_type) != VECTOR_TYPE) > + goto bad; > + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) > + goto bad; > + > + switch (TYPE_MODE (TREE_TYPE (arg0_type))) > + { > + case E_QImode: > + case E_HImode: > + case E_SImode: > + case E_DImode: > + case E_TImode: > + { > + /* For scalar types just use a multiply expression. */ > + return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0, > + fold_convert (TREE_TYPE (arg0), arg1)); > + } > + case E_SFmode: > + { > + /* For floats use the xvmulsp instruction directly. */ > + tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULSP]; > + return build_call_expr (call, 2, arg0, arg1); > + } > + case E_DFmode: > + { > + /* For doubles use the xvmuldp instruction directly. */ > + tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULDP]; > + return build_call_expr (call, 2, arg0, arg1); > + } > + /* Other types are errors. */ > + default: > + goto bad; > + } > + } > + > + if (fcode == RS6000_OVLD_VEC_CMPNE) > + { > + /* vec_cmpne needs to be special cased because there are no > instructions > + for it (prior to power 9). */ > + if (nargs != 2) > + { > + error ("builtin %qs only accepts 2 arguments", "vec_cmpne"); > + return error_mark_node; > + } > + > + tree arg0 = (*arglist)[0]; > + tree arg0_type = TREE_TYPE (arg0); > + tree arg1 = (*arglist)[1]; > + tree arg1_type = TREE_TYPE (arg1); > + > + /* Both arguments must be vectors and the types must be compatible. */ > + if (TREE_CODE (arg0_type) != VECTOR_TYPE) > + goto bad; > + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) > + goto bad; > + > + /* Power9 instructions provide the most efficient implementation of > + ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode > + or SFmode or DFmode. */ > + if (!TARGET_P9_VECTOR > + || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode) > + || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode) > + || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode) > + || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode)) > + { > + switch (TYPE_MODE (TREE_TYPE (arg0_type))) > + { > + /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb), > + vec_cmpeq (va, vb)). */ > + /* Note: vec_nand also works but opt changes vec_nand's > + to vec_nor's anyway. */ > + case E_QImode: > + case E_HImode: > + case E_SImode: > + case E_DImode: > + case E_TImode: > + case E_SFmode: > + case E_DFmode: > + { > + /* call = vec_cmpeq (va, vb) > + result = vec_nor (call, call). */ > + vec<tree, va_gc> *params = make_tree_vector (); > + vec_safe_push (params, arg0); > + vec_safe_push (params, arg1); > + tree call = altivec_resolve_new_overloaded_builtin > + (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_CMPEQ], > + params); > + /* Use save_expr to ensure that operands used more than once > + that may have side effects (like calls) are only evaluated > + once. */ > + call = save_expr (call); > + params = make_tree_vector (); > + vec_safe_push (params, call); > + vec_safe_push (params, call); > + return altivec_resolve_new_overloaded_builtin > + (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_NOR], params); > + } > + /* Other types are errors. */ > + default: > + goto bad; > + } > + } > + /* else, fall through and process the Power9 alternative below */ > + } > + > + if (fcode == RS6000_OVLD_VEC_ADDE || fcode == RS6000_OVLD_VEC_SUBE) > + { > + /* vec_adde needs to be special cased because there is no instruction > + for the {un}signed int version. */ > + if (nargs != 3) > + { > + const char *name; > + name = fcode == RS6000_OVLD_VEC_ADDE ? "vec_adde" : "vec_sube"; > + error ("builtin %qs only accepts 3 arguments", name); > + return error_mark_node; > + } > + > + tree arg0 = (*arglist)[0]; > + tree arg0_type = TREE_TYPE (arg0); > + tree arg1 = (*arglist)[1]; > + tree arg1_type = TREE_TYPE (arg1); > + tree arg2 = (*arglist)[2]; > + tree arg2_type = TREE_TYPE (arg2); > + > + /* All 3 arguments must be vectors of (signed or unsigned) (int or > + __int128) and the types must be compatible. */ > + if (TREE_CODE (arg0_type) != VECTOR_TYPE) > + goto bad; > + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) > + || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) > + goto bad; > + > + switch (TYPE_MODE (TREE_TYPE (arg0_type))) > + { > + /* For {un}signed ints, > + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), > + vec_and (carryv, 1)). > + vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb), > + vec_and (carryv, 1)). */ Also commented out in the original code. Since it's dead code, maybe worth enhancing the comment to clarify why this is disabled? > + case E_SImode: > + { > + tree add_sub_builtin; > + > + vec<tree, va_gc> *params = make_tree_vector (); > + vec_safe_push (params, arg0); > + vec_safe_push (params, arg1); > + > + if (fcode == RS6000_OVLD_VEC_ADDE) > + add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD]; > + else > + add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB]; > + > + tree call > + = altivec_resolve_new_overloaded_builtin (loc, > + add_sub_builtin, > + params); > + tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); > + tree ones_vector = build_vector_from_val (arg0_type, const1); > + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, > + arg2, ones_vector); > + params = make_tree_vector (); > + vec_safe_push (params, call); > + vec_safe_push (params, and_expr); > + return altivec_resolve_new_overloaded_builtin (loc, > + add_sub_builtin, > + params); > + } > + /* For {un}signed __int128s use the vaddeuqm/vsubeuqm instruction > + directly. */ > + case E_TImode: > + break; > + > + /* Types other than {un}signed int and {un}signed __int128 > + are errors. */ > + default: > + goto bad; > + } > + } > + > + if (fcode == RS6000_OVLD_VEC_ADDEC || fcode == RS6000_OVLD_VEC_SUBEC) > + { > + /* vec_addec and vec_subec needs to be special cased because there is > + no instruction for the {un}signed int version. */ > + if (nargs != 3) > + { > + const char *name; > + name = fcode == RS6000_OVLD_VEC_ADDEC ? "vec_addec" : "vec_subec"; > + error ("builtin %qs only accepts 3 arguments", name); > + return error_mark_node; > + } > + > + tree arg0 = (*arglist)[0]; > + tree arg0_type = TREE_TYPE (arg0); > + tree arg1 = (*arglist)[1]; > + tree arg1_type = TREE_TYPE (arg1); > + tree arg2 = (*arglist)[2]; > + tree arg2_type = TREE_TYPE (arg2); > + > + /* All 3 arguments must be vectors of (signed or unsigned) (int or > + __int128) and the types must be compatible. */ > + if (TREE_CODE (arg0_type) != VECTOR_TYPE) > + goto bad; > + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) > + || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) > + goto bad; > + > + switch (TYPE_MODE (TREE_TYPE (arg0_type))) > + { > + /* For {un}signed ints, > + vec_addec (va, vb, carryv) == > + vec_or (vec_addc (va, vb), > + vec_addc (vec_add (va, vb), > + vec_and (carryv, 0x1))). */ similar here. > + case E_SImode: > + { > + /* Use save_expr to ensure that operands used more than once > + that may have side effects (like calls) are only evaluated > + once. */ > + tree as_builtin; > + tree as_c_builtin; > + > + arg0 = save_expr (arg0); > + arg1 = save_expr (arg1); > + vec<tree, va_gc> *params = make_tree_vector (); > + vec_safe_push (params, arg0); > + vec_safe_push (params, arg1); > + > + if (fcode == RS6000_OVLD_VEC_ADDEC) > + as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADDC]; > + else > + as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUBC]; > + > + tree call1 = altivec_resolve_new_overloaded_builtin (loc, > + as_c_builtin, > + params); > + params = make_tree_vector (); > + vec_safe_push (params, arg0); > + vec_safe_push (params, arg1); > + > + extra blank line? > + if (fcode == RS6000_OVLD_VEC_ADDEC) > + as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD]; > + else > + as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB]; > + > + tree call2 = altivec_resolve_new_overloaded_builtin (loc, > + as_builtin, > + params); > + tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); > + tree ones_vector = build_vector_from_val (arg0_type, const1); > + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, > + arg2, ones_vector); > + params = make_tree_vector (); > + vec_safe_push (params, call2); > + vec_safe_push (params, and_expr); > + call2 = altivec_resolve_new_overloaded_builtin (loc, as_c_builtin, > + params); > + params = make_tree_vector (); > + vec_safe_push (params, call1); > + vec_safe_push (params, call2); > + tree or_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_OR]; > + return altivec_resolve_new_overloaded_builtin (loc, or_builtin, > + params); > + } > + /* For {un}signed __int128s use the vaddecuq/vsubbecuq > + instructions. This occurs through normal processing. */ > + case E_TImode: > + break; > + > + /* Types other than {un}signed int and {un}signed __int128 > + are errors. */ > + default: > + goto bad; > + } > + } ok > + > + /* For now treat vec_splats and vec_promote as the same. */ > + if (fcode == RS6000_OVLD_VEC_SPLATS || fcode == RS6000_OVLD_VEC_PROMOTE) > + { > + tree type, arg; > + int size; > + int i; > + bool unsigned_p; > + vec<constructor_elt, va_gc> *vec; > + const char *name; > + name = fcode == RS6000_OVLD_VEC_SPLATS ? "vec_splats" : "vec_promote"; > + > + if (fcode == RS6000_OVLD_VEC_SPLATS && nargs != 1) > + { > + error ("builtin %qs only accepts 1 argument", name); > + return error_mark_node; > + } > + if (fcode == RS6000_OVLD_VEC_PROMOTE && nargs != 2) > + { > + error ("builtin %qs only accepts 2 arguments", name); > + return error_mark_node; > + } > + /* Ignore promote's element argument. */ > + if (fcode == RS6000_OVLD_VEC_PROMOTE > + && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1]))) > + goto bad; > + > + arg = (*arglist)[0]; > + type = TREE_TYPE (arg); > + if (!SCALAR_FLOAT_TYPE_P (type) > + && !INTEGRAL_TYPE_P (type)) > + goto bad; > + unsigned_p = TYPE_UNSIGNED (type); > + switch (TYPE_MODE (type)) > + { > + case E_TImode: > + type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); > + size = 1; > + break; > + case E_DImode: > + type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); > + size = 2; > + break; > + case E_SImode: > + type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); > + size = 4; > + break; > + case E_HImode: > + type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); > + size = 8; > + break; > + case E_QImode: > + type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); > + size = 16; > + break; > + case E_SFmode: > + type = V4SF_type_node; > + size = 4; > + break; > + case E_DFmode: > + type = V2DF_type_node; > + size = 2; > + break; > + default: > + goto bad; > + } > + arg = save_expr (fold_convert (TREE_TYPE (type), arg)); > + vec_alloc (vec, size); > + for (i = 0; i < size; i++) > + { > + constructor_elt elt = {NULL_TREE, arg}; > + vec->quick_push (elt); > + } > + return build_constructor (type, vec); > + } > + > + /* For now use pointer tricks to do the extraction, unless we are on VSX > + extracting a double from a constant offset. */ > + if (fcode == RS6000_OVLD_VEC_EXTRACT) > + { > + tree arg1; > + tree arg1_type; > + tree arg2; > + tree arg1_inner_type; > + tree decl, stmt; > + tree innerptrtype; > + machine_mode mode; > + > + /* No second argument. */ > + if (nargs != 2) > + { > + error ("builtin %qs only accepts 2 arguments", "vec_extract"); > + return error_mark_node; > + } > + > + arg2 = (*arglist)[1]; > + arg1 = (*arglist)[0]; > + arg1_type = TREE_TYPE (arg1); > + > + if (TREE_CODE (arg1_type) != VECTOR_TYPE) > + goto bad; > + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) > + goto bad; > + > + /* See if we can optimize vec_extracts with the current VSX instruction > + set. */ > + mode = TYPE_MODE (arg1_type); > + if (VECTOR_MEM_VSX_P (mode)) > + > + { > + tree call = NULL_TREE; > + int nunits = GET_MODE_NUNITS (mode); > + > + arg2 = fold_for_warn (arg2); > + > + /* If the second argument is an integer constant, generate > + the built-in code if we can. We need 64-bit and direct > + move to extract the small integer vectors. */ > + if (TREE_CODE (arg2) == INTEGER_CST) > + { > + wide_int selector = wi::to_wide (arg2); > + selector = wi::umod_trunc (selector, nunits); > + arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector); > + switch (mode) > + { > + default: > + break; > + > + case E_V1TImode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V1TI]; > + break; > + > + case E_V2DFmode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF]; > + break; > + > + case E_V2DImode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI]; > + break; > + > + case E_V4SFmode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF]; > + break; > + > + case E_V4SImode: > + if (TARGET_DIRECT_MOVE_64BIT) > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI]; > + break; > + > + case E_V8HImode: > + if (TARGET_DIRECT_MOVE_64BIT) > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI]; > + break; > + > + case E_V16QImode: > + if (TARGET_DIRECT_MOVE_64BIT) > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI]; > + break; > + } > + } > + > + /* If the second argument is variable, we can optimize it if we are > + generating 64-bit code on a machine with direct move. */ > + else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT) > + { > + switch (mode) > + { > + default: > + break; > + > + case E_V2DFmode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF]; > + break; > + > + case E_V2DImode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI]; > + break; > + > + case E_V4SFmode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF]; > + break; > + > + case E_V4SImode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI]; > + break; > + > + case E_V8HImode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI]; > + break; > + > + case E_V16QImode: > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI]; > + break; > + } > + } > + > + if (call) > + { > + tree result = build_call_expr (call, 2, arg1, arg2); > + /* Coerce the result to vector element type. May be no-op. */ > + arg1_inner_type = TREE_TYPE (arg1_type); > + result = fold_convert (arg1_inner_type, result); > + return result; > + } > + } > + > + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */ > + arg1_inner_type = TREE_TYPE (arg1_type); > + arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, > + build_int_cst (TREE_TYPE (arg2), > + TYPE_VECTOR_SUBPARTS (arg1_type) > + - 1), 0); > + decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type); > + DECL_EXTERNAL (decl) = 0; > + TREE_PUBLIC (decl) = 0; > + DECL_CONTEXT (decl) = current_function_decl; > + TREE_USED (decl) = 1; > + TREE_TYPE (decl) = arg1_type; > + TREE_READONLY (decl) = TYPE_READONLY (arg1_type); > + if (c_dialect_cxx ()) > + { > + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, > + NULL_TREE, NULL_TREE); > + SET_EXPR_LOCATION (stmt, loc); > + } > + else > + { > + DECL_INITIAL (decl) = arg1; > + stmt = build1 (DECL_EXPR, arg1_type, decl); > + TREE_ADDRESSABLE (decl) = 1; > + SET_EXPR_LOCATION (stmt, loc); > + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); > + } > + > + innerptrtype = build_pointer_type (arg1_inner_type); > + > + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); > + stmt = convert (innerptrtype, stmt); > + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); > + stmt = build_indirect_ref (loc, stmt, RO_NULL); > + > + /* PR83660: We mark this as having side effects so that > + downstream in fold_build_cleanup_point_expr () it will get a > + CLEANUP_POINT_EXPR. If it does not we can run into an ICE > + later in gimplify_cleanup_point_expr (). Potentially this > + causes missed optimization because there actually is no side > + effect. */ > + if (c_dialect_cxx ()) > + TREE_SIDE_EFFECTS (stmt) = 1; > + > + return stmt; > + } ok > + > + /* For now use pointer tricks to do the insertion, unless we are on VSX > + inserting a double to a constant offset.. */ Too many ending periods. :-) (also in original) > + if (fcode == RS6000_OVLD_VEC_INSERT) > + { > + tree arg0; > + tree arg1; > + tree arg2; > + tree arg1_type; > + tree decl, stmt; > + machine_mode mode; > + > + /* No second or third arguments. */ > + if (nargs != 3) > + { > + error ("builtin %qs only accepts 3 arguments", "vec_insert"); > + return error_mark_node; > + } > + > + arg0 = (*arglist)[0]; > + arg1 = (*arglist)[1]; > + arg1_type = TREE_TYPE (arg1); > + arg2 = fold_for_warn ((*arglist)[2]); > + > + if (TREE_CODE (arg1_type) != VECTOR_TYPE) > + goto bad; > + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) > + goto bad; > + > + /* If we can use the VSX xxpermdi instruction, use that for insert. */ > + mode = TYPE_MODE (arg1_type); > + if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode) > + && TREE_CODE (arg2) == INTEGER_CST) > + { > + wide_int selector = wi::to_wide (arg2); > + selector = wi::umod_trunc (selector, 2); > + tree call = NULL_TREE; > + > + arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector); > + if (mode == V2DFmode) > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DF]; > + else if (mode == V2DImode) > + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DI]; > + > + /* Note, __builtin_vec_insert_<xxx> has vector and scalar types > + reversed. */ > + if (call) > + return build_call_expr (call, 3, arg1, arg0, arg2); > + } > + else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode) > + && TREE_CODE (arg2) == INTEGER_CST) > + { > + tree call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V1TI]; > + wide_int selector = wi::zero(32); > + > + arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector); > + /* Note, __builtin_vec_insert_<xxx> has vector and scalar types > + reversed. */ > + return build_call_expr (call, 3, arg1, arg0, arg2); > + } > + > + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0 with > + VIEW_CONVERT_EXPR. i.e.: > + D.3192 = v1; > + _1 = n & 3; > + VIEW_CONVERT_EXPR<int[4]>(D.3192)[_1] = i; > + v1 = D.3192; > + D.3194 = v1; */ > + if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1) > + arg2 = build_int_cst (TREE_TYPE (arg2), 0); > + else > + arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, > + build_int_cst (TREE_TYPE (arg2), > + TYPE_VECTOR_SUBPARTS (arg1_type) > + - 1), 0); > + decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type); > + DECL_EXTERNAL (decl) = 0; > + TREE_PUBLIC (decl) = 0; > + DECL_CONTEXT (decl) = current_function_decl; > + TREE_USED (decl) = 1; > + TREE_TYPE (decl) = arg1_type; > + TREE_READONLY (decl) = TYPE_READONLY (arg1_type); > + TREE_ADDRESSABLE (decl) = 1; > + if (c_dialect_cxx ()) > + { > + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, > + NULL_TREE, NULL_TREE); > + SET_EXPR_LOCATION (stmt, loc); > + } > + else > + { > + DECL_INITIAL (decl) = arg1; > + stmt = build1 (DECL_EXPR, arg1_type, decl); > + SET_EXPR_LOCATION (stmt, loc); > + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); > + } > + > + if (TARGET_VSX) > + { > + stmt = build_array_ref (loc, stmt, arg2); > + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, > + convert (TREE_TYPE (stmt), arg0)); > + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); > + } > + else > + { > + tree arg1_inner_type; > + tree innerptrtype; > + arg1_inner_type = TREE_TYPE (arg1_type); > + innerptrtype = build_pointer_type (arg1_inner_type); > + > + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); > + stmt = convert (innerptrtype, stmt); > + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); > + stmt = build_indirect_ref (loc, stmt, RO_NULL); > + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, > + convert (TREE_TYPE (stmt), arg0)); > + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); > + } > + return stmt; > + } > + > + for (n = 0; > + !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; > + fnargs = TREE_CHAIN (fnargs), n++) > + { > + tree decl_type = TREE_VALUE (fnargs); > + tree arg = (*arglist)[n]; > + tree type; > + > + if (arg == error_mark_node) > + return error_mark_node; > + > + if (n >= MAX_OVLD_ARGS) > + abort (); > + > + arg = default_conversion (arg); > + > + /* The C++ front-end converts float * to const void * using > + NOP_EXPR<const void *> (NOP_EXPR<void *> (x)). */ > + type = TREE_TYPE (arg); > + if (POINTER_TYPE_P (type) > + && TREE_CODE (arg) == NOP_EXPR > + && lang_hooks.types_compatible_p (TREE_TYPE (arg), > + const_ptr_type_node) > + && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)), > + ptr_type_node)) > + { > + arg = TREE_OPERAND (arg, 0); > + type = TREE_TYPE (arg); > + } > + > + /* Remove the const from the pointers to simplify the overload > + matching further down. */ > + if (POINTER_TYPE_P (decl_type) > + && POINTER_TYPE_P (type) > + && TYPE_QUALS (TREE_TYPE (type)) != 0) > + { > + if (TYPE_READONLY (TREE_TYPE (type)) > + && !TYPE_READONLY (TREE_TYPE (decl_type))) > + warning (0, "passing argument %d of %qE discards qualifiers from " > + "pointer target type", n + 1, fndecl); > + type = build_pointer_type (build_qualified_type (TREE_TYPE (type), > + 0)); > + arg = fold_convert (type, arg); > + } > + > + /* For RS6000_OVLD_VEC_LXVL, convert any const * to its non constant > + equivalent to simplify the overload matching below. */ > + if (fcode == RS6000_OVLD_VEC_LXVL) > + { > + if (POINTER_TYPE_P (type) > + && TYPE_READONLY (TREE_TYPE (type))) > + { > + type = build_pointer_type (build_qualified_type ( > + TREE_TYPE (type),0)); > + arg = fold_convert (type, arg); > + } > + } > + > + args[n] = arg; > + types[n] = type; > + } > + > + /* If the number of arguments did not match the prototype, return NULL > + and the generic code will issue the appropriate error message. */ > + if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs) > + return NULL; > + > + if (fcode == RS6000_OVLD_VEC_STEP) > + { > + if (TREE_CODE (types[0]) != VECTOR_TYPE) > + goto bad; > + > + return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0])); > + } > + > + { > + bool unsupported_builtin = false; > + enum rs6000_gen_builtins overloaded_code; > + bool supported = false; > + ovlddata *instance = rs6000_overload_info[adj_fcode].first_instance; > + gcc_assert (instance != NULL); > + > + /* Need to special case __builtin_cmpb because the overloaded forms > + of this function take (unsigned int, unsigned int) or (unsigned > + long long int, unsigned long long int). Since C conventions > + allow the respective argument types to be implicitly coerced into > + each other, the default handling does not provide adequate > + discrimination between the desired forms of the function. */ > + if (fcode == RS6000_OVLD_SCAL_CMPB) > + { > + machine_mode arg1_mode = TYPE_MODE (types[0]); > + machine_mode arg2_mode = TYPE_MODE (types[1]); > + > + if (nargs != 2) > + { > + error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb"); > + return error_mark_node; > + } > + > + /* If any supplied arguments are wider than 32 bits, resolve to > + 64-bit variant of built-in function. */ > + if ((GET_MODE_PRECISION (arg1_mode) > 32) > + || (GET_MODE_PRECISION (arg2_mode) > 32)) > + { > + /* Assure all argument and result types are compatible with > + the built-in function represented by RS6000_BIF_CMPB. */ > + overloaded_code = RS6000_BIF_CMPB; > + } > + else > + { > + /* Assure all argument and result types are compatible with > + the built-in function represented by RS6000_BIF_CMPB_32. */ > + overloaded_code = RS6000_BIF_CMPB_32; > + } > + > + while (instance && instance->bifid != overloaded_code) > + instance = instance->next; > + > + gcc_assert (instance != NULL); > + tree fntype = rs6000_builtin_info_x[instance->bifid].fntype; > + tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype)); > + tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype))); > + > + if (rs6000_new_builtin_type_compatible (types[0], parmtype0) > + && rs6000_new_builtin_type_compatible (types[1], parmtype1)) > + { > + if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node > + && rs6000_new_builtin_is_supported (instance->bifid)) > + { > + tree ret_type = TREE_TYPE (instance->fntype); > + return altivec_build_new_resolved_builtin (args, n, fntype, > + ret_type, > + instance->bifid, > + fcode); > + } > + else > + unsupported_builtin = true; > + } > + } > + else if (fcode == RS6000_OVLD_VEC_VSIE) OK, noting that this is foo_VEC_VSIEDP in the original code. (DP indicator dropped). > + { > + machine_mode arg1_mode = TYPE_MODE (types[0]); > + > + if (nargs != 2) > + { > + error ("builtin %qs only accepts 2 arguments", > + "scalar_insert_exp"); > + return error_mark_node; > + } > + > + /* If supplied first argument is wider than 64 bits, resolve to > + 128-bit variant of built-in function. */ > + if (GET_MODE_PRECISION (arg1_mode) > 64) > + { > + /* If first argument is of float variety, choose variant > + that expects __ieee128 argument. Otherwise, expect > + __int128 argument. */ Could use some "a" and/or "the" in the comment there. similar below. This matches comment in original code, so nbd. :-) > + if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT) > + overloaded_code = RS6000_BIF_VSIEQPF; > + else > + overloaded_code = RS6000_BIF_VSIEQP; > + } > + else > + { > + /* If first argument is of float variety, choose variant > + that expects double argument. Otherwise, expect > + long long int argument. */ > + if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT) > + overloaded_code = RS6000_BIF_VSIEDPF; > + else > + overloaded_code = RS6000_BIF_VSIEDP; > + } > + > + while (instance && instance->bifid != overloaded_code) > + instance = instance->next; > + > + gcc_assert (instance != NULL); > + tree fntype = rs6000_builtin_info_x[instance->bifid].fntype; > + tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype)); > + tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype))); > + > + if (rs6000_new_builtin_type_compatible (types[0], parmtype0) > + && rs6000_new_builtin_type_compatible (types[1], parmtype1)) > + { > + if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node > + && rs6000_new_builtin_is_supported (instance->bifid)) > + { > + tree ret_type = TREE_TYPE (instance->fntype); > + return altivec_build_new_resolved_builtin (args, n, fntype, > + ret_type, > + instance->bifid, > + fcode); > + } > + else > + unsupported_builtin = true; > + } > + } > + else > + { > + /* Functions with no arguments can have only one overloaded > + instance. */ > + gcc_assert (n > 0 || !instance->next); > + > + for (; instance != NULL; instance = instance->next) > + { > + bool mismatch = false; > + tree nextparm = TYPE_ARG_TYPES (instance->fntype); > + > + for (unsigned int arg_i = 0; > + arg_i < nargs && nextparm != NULL; > + arg_i++) > + { > + tree parmtype = TREE_VALUE (nextparm); > + if (!rs6000_new_builtin_type_compatible (types[arg_i], > + parmtype)) > + { > + mismatch = true; > + break; > + } > + nextparm = TREE_CHAIN (nextparm); > + } > + > + if (mismatch) > + continue; > + > + supported = rs6000_new_builtin_is_supported (instance->bifid); > + if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node > + && supported) > + { > + tree fntype = rs6000_builtin_info_x[instance->bifid].fntype; > + tree ret_type = TREE_TYPE (instance->fntype); > + return altivec_build_new_resolved_builtin (args, n, fntype, > + ret_type, > + instance->bifid, > + fcode); > + } > + else > + { > + unsupported_builtin = true; > + break; > + } > + } > + } > + > + if (unsupported_builtin) > + { > + const char *name = rs6000_overload_info[adj_fcode].ovld_name; > + if (!supported) > + { > + const char *internal_name > + = rs6000_builtin_info_x[instance->bifid].bifname; > + /* An error message making reference to the name of the > + non-overloaded function has already been issued. Add > + clarification of the previous message. */ > + rich_location richloc (line_table, input_location); > + inform (&richloc, "builtin %qs requires builtin %qs", > + name, internal_name); > + } > + else > + error ("%qs is not supported in this compiler configuration", name); > + /* If an error-representing result tree was returned from > + altivec_build_resolved_builtin above, use it. */ Extra space after error-representing. Also in original code. > + /* > + return (result != NULL) ? result : error_mark_node; > + */ > + return error_mark_node; > + } > + } > + bad: > + { > + const char *name = rs6000_overload_info[adj_fcode].ovld_name; > + error ("invalid parameter combination for AltiVec intrinsic %qs", name); > + return error_mark_node; > + } > +} ok > diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c > index e8625d17d18..2c68aa3580c 100644 > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -12971,6 +12971,59 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator > *gsi) > return false; > } > > +/* Check whether a builtin function is supported in this target > + configuration. */ > +bool > +rs6000_new_builtin_is_supported (enum rs6000_gen_builtins fncode) > +{ > + switch (rs6000_builtin_info_x[(size_t) fncode].enable) > + { > + default: > + gcc_unreachable (); > + case ENB_ALWAYS: > + return true; > + case ENB_P5: > + return TARGET_POPCNTB; > + case ENB_P6: > + return TARGET_CMPB; > + case ENB_ALTIVEC: > + return TARGET_ALTIVEC; > + case ENB_CELL: > + return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL; > + case ENB_VSX: > + return TARGET_VSX; > + case ENB_P7: > + return TARGET_POPCNTD; > + case ENB_P7_64: > + return TARGET_POPCNTD && TARGET_POWERPC64; > + case ENB_P8: > + return TARGET_DIRECT_MOVE; > + case ENB_P8V: > + return TARGET_P8_VECTOR; > + case ENB_P9: > + return TARGET_MODULO; > + case ENB_P9_64: > + return TARGET_MODULO && TARGET_POWERPC64; > + case ENB_P9V: > + return TARGET_P9_VECTOR; > + case ENB_IEEE128_HW: > + return TARGET_FLOAT128_HW; > + case ENB_DFP: > + return TARGET_DFP; > + case ENB_CRYPTO: > + return TARGET_CRYPTO; > + case ENB_HTM: > + return TARGET_HTM; > + case ENB_P10: > + return TARGET_POWER10; > + case ENB_P10_64: > + return TARGET_POWER10 && TARGET_POWERPC64; > + case ENB_MMA: > + return TARGET_MMA; > + } > + gcc_unreachable (); > +} ok > + > /* Expand an expression EXP that calls a built-in function, > with result going to TARGET if that's convenient > (and in mode MODE if that's convenient). > diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c > b/gcc/config/rs6000/rs6000-gen-builtins.c > index f3d6156400a..f65932e1cd5 100644 > --- a/gcc/config/rs6000/rs6000-gen-builtins.c > +++ b/gcc/config/rs6000/rs6000-gen-builtins.c > @@ -2314,7 +2314,7 @@ write_decls (void) > > fprintf (header_file, "extern void rs6000_init_generated_builtins > ();\n\n"); > fprintf (header_file, > - "extern bool rs6000_new_builtin_is_supported_p " > + "extern bool rs6000_new_builtin_is_supported " > "(rs6000_gen_builtins);\n"); > fprintf (header_file, > "extern tree rs6000_builtin_decl (unsigned, " ok. Thanks -Will