Alfie Richards <[email protected]> writes:
> diff --git a/gcc/attribs.cc b/gcc/attribs.cc
> index b00d9529a8d..d0f37d77098 100644
> --- a/gcc/attribs.cc
> +++ b/gcc/attribs.cc
> [...]
> @@ -1287,6 +1282,33 @@ make_dispatcher_decl (const tree decl)
> DECL_EXTERNAL (func_decl) = 1;
> /* This will be of type IFUNCs have to be externally visible. */
> TREE_PUBLIC (func_decl) = 1;
> + TREE_NOTHROW (func_decl) = TREE_NOTHROW (decl);
> +
> + /* Set the decl name to avoid graph_node re-mangling it. */
> + SET_DECL_ASSEMBLER_NAME (func_decl, DECL_ASSEMBLER_NAME (decl));
> +
> + cgraph_node *node = cgraph_node::get (decl);
> + gcc_assert (node);
> + cgraph_function_version_info *node_v = node->function_version ();
> + gcc_assert (node_v);
Very minor suggestion, but: all callers already have the node to hand
and pass the decl inside it, so perhaps it would make sense to change
make_dispatcher_decl so that it takes the cgraph node instead.
> [...]
> @@ -19894,37 +19894,35 @@ static aarch64_fmv_feature_datum
> aarch64_fmv_feature_data[] = {
> the extension string is created and stored to INVALID_EXTENSION. */
>
> static enum aarch_parse_opt_result
> -aarch64_parse_fmv_features (const char *str, aarch64_feature_flags
> *isa_flags,
> +aarch64_parse_fmv_features (string_slice str, aarch64_feature_flags
> *isa_flags,
> aarch64_fmv_feature_mask *feature_mask,
> std::string *invalid_extension)
> {
> if (feature_mask)
> *feature_mask = 0ULL;
>
> - if (strcmp (str, "default") == 0)
> + if (str == "default")
> return AARCH_PARSE_OK;
>
> - while (str != NULL && *str != 0)
> + string_slice str_parse = str;
> +
> + gcc_assert (str.is_valid ());
> + while (str_parse.is_valid ())
> {
> - const char *ext;
> - size_t len;
> + string_slice ext;
>
> - ext = strchr (str, '+');
> + ext = string_slice::tokenize (&str_parse, string_slice ("+"));
Following on from the comment about explicit constructors, it'd be
nice not to need the explicit constructor here.
> - if (ext != NULL)
> - len = ext - str;
> - else
> - len = strlen (str);
> + gcc_assert (ext.is_valid ());
>
> - if (len == 0)
> + if (!ext.is_valid () || ext.empty ())
The assert makes the !ext.is_valid () part redundant.
> return AARCH_PARSE_MISSING_ARG;
>
> int num_features = ARRAY_SIZE (aarch64_fmv_feature_data);
> int i;
> for (i = 0; i < num_features; i++)
> {
> - if (strlen (aarch64_fmv_feature_data[i].name) == len
> - && strncmp (aarch64_fmv_feature_data[i].name, str, len) == 0)
> + if (aarch64_fmv_feature_data[i].name == ext)
> {
> if (isa_flags)
> *isa_flags |= aarch64_fmv_feature_data[i].opt_flags;
> [...]
> @@ -19992,7 +19987,7 @@ aarch64_process_target_version_attr (tree args)
> return false;
> }
>
> - const char *str = TREE_STRING_POINTER (args);
> + string_slice str = string_slice (TREE_STRING_POINTER (args));
Similarly here, I'd hope:
string_slice str = TREE_STRING_POINTER (args);
would be enough.
>
> enum aarch_parse_opt_result parse_res;
> auto isa_flags = aarch64_asm_isa_flags;
> @@ -20195,36 +20191,33 @@ tree
> aarch64_mangle_decl_assembler_name (tree decl, tree id)
> {
> /* For function version, add the target suffix to the assembler name. */
> - if (TREE_CODE (decl) == FUNCTION_DECL
> - && DECL_FUNCTION_VERSIONED (decl))
> + if (TREE_CODE (decl) == FUNCTION_DECL)
> {
> - aarch64_fmv_feature_mask feature_mask = get_feature_mask_for_version
> (decl);
> -
> - std::string name = IDENTIFIER_POINTER (id);
> -
> - /* For the default version, append ".default". */
> - if (feature_mask == 0ULL)
> + cgraph_node *node = cgraph_node::get (decl);
> + if (node && node->dispatcher_function)
> + return id;
> + else if (node && node->dispatcher_resolver_function)
> + return clone_identifier (id, "resolver");
> + else if (DECL_FUNCTION_VERSIONED (decl))
> {
> - name += ".default";
> - return get_identifier (name.c_str());
> - }
> + aarch64_fmv_feature_mask feature_mask
> + = get_feature_mask_for_version (decl);
>
> - name += "._";
> + if (feature_mask == 0ULL)
> + return clone_identifier (id, "default");
>
> - int num_features = ARRAY_SIZE (aarch64_fmv_feature_data);
> - for (int i = 0; i < num_features; i++)
> - {
> - if (feature_mask & aarch64_fmv_feature_data[i].feature_mask)
> - {
> - name += "M";
> - name += aarch64_fmv_feature_data[i].name;
> - }
> - }
> + std::string suffix = "_";
>
> - if (DECL_ASSEMBLER_NAME_SET_P (decl))
> - SET_DECL_RTL (decl, NULL);
> + int num_features = ARRAY_SIZE (aarch64_fmv_feature_data);
> + for (int i = 0; i < num_features; i++)
> + if (feature_mask & aarch64_fmv_feature_data[i].feature_mask)
> + {
> + suffix += "M";
> + suffix += aarch64_fmv_feature_data[i].name;
> + }
>
> - id = get_identifier (name.c_str());
> + id = clone_identifier (id, suffix.c_str ());
> + }
> }
> return id;
> }
Thanks, the new flow makes a lot more intuitive sense.
> @@ -20233,18 +20226,6 @@ aarch64_mangle_decl_assembler_name (tree decl, tree
> id)
> This is computed by taking the default version's assembler name, and
> stripping off the ".default" suffix if it's already been appended. */
The comment above the function should go as well.
>
> -static tree
> -get_suffixed_assembler_name (tree default_decl, const char *suffix)
> -{
> - std::string name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (default_decl));
> -
> - auto size = name.size ();
> - if (size >= 8 && name.compare (size - 8, 8, ".default") == 0)
> - name.resize (size - 8);
> - name += suffix;
> - return get_identifier (name.c_str());
> -}
> -
> /* Make the resolver function decl to dispatch the versions of
> a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
> ifunc alias that will point to the created resolver. Create an
> [...]
> @@ -20270,10 +20249,21 @@ make_resolver_func (const tree default_decl,
> build_ifunc_arg_type (),
> NULL_TREE);
>
> - decl = build_fn_decl (resolver_name, type);
> - SET_DECL_ASSEMBLER_NAME (decl, decl_name);
> + cgraph_node *node = cgraph_node::get (default_decl);
Similarly to the comment above, the caller already has this node to hand,
so it might be better to pass that instead of the decl.
> + gcc_assert (node && node->function_version ());
> +
> + decl = build_fn_decl (IDENTIFIER_POINTER (DECL_NAME (default_decl)), type);
> +
> + /* Set the assembler name to prevent cgraph_node attempting to mangle. */
> + SET_DECL_ASSEMBLER_NAME (decl, DECL_ASSEMBLER_NAME (default_decl));
> +
> + cgraph_node *resolver_node = cgraph_node::get_create (decl);
> + resolver_node->dispatcher_resolver_function = true;
> +
> + tree id = aarch64_mangle_decl_assembler_name
> + (decl, node->function_version ()->assembler_name);
> + symtab->change_decl_assembler_name (decl, id);
>
> - DECL_NAME (decl) = decl_name;
> TREE_USED (decl) = 1;
> DECL_ARTIFICIAL (decl) = 1;
> DECL_IGNORED_P (decl) = 1;
> @@ -20338,7 +20328,7 @@ make_resolver_func (const tree default_decl,
> gcc_assert (ifunc_alias_decl != NULL);
> /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
> DECL_ATTRIBUTES (ifunc_alias_decl)
> - = make_attribute ("ifunc", resolver_name,
> + = make_attribute ("ifunc", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME
> (decl)),
> DECL_ATTRIBUTES (ifunc_alias_decl));
>
> /* Create the alias for dispatch to resolver here. */
Sorry to create more work :), but how much of make_resolver_func is
shared between AArch64 and RISC-V? If the two functions are essentially
identical, minus using things like aarch64_mangle_decl_assembler_name
directly instead of the hook, then it might be worth moving this into
target-independent code.
To be clear, this is for target_version targets only. It's fine
(IMO) if x86 and powerpc need ad hoc variants that stay inside the
target code.
> [...]
> diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
> index d8becf4d9a9..cd7570a9aad 100644
> --- a/gcc/multiple_target.cc
> +++ b/gcc/multiple_target.cc
> [...]
> @@ -317,11 +219,11 @@ expand_target_clones (struct cgraph_node *node, bool
> definition)
> if (!attr_target)
> return false;
>
> - tree arglist = TREE_VALUE (attr_target);
> - int attr_len = get_target_clone_attr_len (arglist);
> + int num_def = 0;
> + auto_vec<string_slice> attr_list = get_clone_versions (node->decl,
> &num_def);
>
> /* No need to clone for 1 target attribute. */
> - if (attr_len == -1)
> + if (attr_list.length () == 1)
> {
> warning_at (DECL_SOURCE_LOCATION (node->decl),
> 0, "single %<target_clones%> attribute is ignored");
Sorry for only bringing this up now, but I wonder if we should bump
this to an error for target_version targets. It seems odd that we
make no attempt to validate the string further.
> @@ -348,67 +250,71 @@ expand_target_clones (struct cgraph_node *node, bool
> definition)
> return false;
> }
>
> - char *attr_str = XNEWVEC (char, attr_len);
> - int attrnum = get_attr_str (arglist, attr_str);
> - char **attrs = XNEWVEC (char *, attrnum);
> -
> - attrnum = separate_attrs (attr_str, attrs, attrnum);
> - switch (attrnum)
> + /* Disallow multiple defaults. */
> + if (num_def > 1)
> {
> - case -1:
> - error_at (DECL_SOURCE_LOCATION (node->decl),
> - "%<default%> target was not set");
> - break;
> - case -2:
> - error_at (DECL_SOURCE_LOCATION (node->decl),
> - "an empty string cannot be in %<target_clones%> attribute");
> - break;
> - case -3:
> error_at (DECL_SOURCE_LOCATION (node->decl),
> "multiple %<default%> targets were set");
> - break;
> - default:
> - break;
> + return false;
> }
> -
> - if (attrnum < 0)
> + /* Disallow target clones with no defaults. */
> + if (num_def == 0)
> {
> - XDELETEVEC (attrs);
> - XDELETEVEC (attr_str);
> + error_at (DECL_SOURCE_LOCATION (node->decl),
> + "%<default%> target was not set");
> return false;
> }
>
> - const char *new_attr_name = (TARGET_HAS_FMV_TARGET_ATTRIBUTE
> - ? "target" : "target_version");
> + /* Disallow any empty values in the clone attr. */
> + for (string_slice attr : attr_list)
> + if (attr == string_slice ("") || !attr.is_valid ())
> + {
> + error_at (DECL_SOURCE_LOCATION (node->decl),
> + "an empty string cannot be in %<target_clones%> attribute");
> + return false;
> + }
> +
> + string_slice new_attr_name = string_slice
> + (TARGET_HAS_FMV_TARGET_ATTRIBUTE ? "target" : "target_version");
> cgraph_function_version_info *decl1_v = NULL;
> cgraph_function_version_info *decl2_v = NULL;
> cgraph_function_version_info *before = NULL;
> cgraph_function_version_info *after = NULL;
> decl1_v = node->function_version ();
> - if (decl1_v == NULL)
> + if (!decl1_v)
> decl1_v = node->insert_new_function_version ();
> - before = decl1_v;
> +
> + node->is_target_clone = true;
> DECL_FUNCTION_VERSIONED (node->decl) = 1;
>
> - for (i = 0; i < attrnum; i++)
> + before = decl1_v;
> +
> + /* The existing decl is turned into one of the target versions.
> + If there is a default in the list then this decl is used for that
> version
> + as the calls are already to it so in the case where there is no
> + implementation and !TARGET_HAS_FMV_TARGET_ATTRIBUTE there is no
> redirection
> + necessary.
> + Otherwise, the first version listed in the attribute is used. */
Looks like it uses the last version listed, rather than the first.
> + string_slice this_node_version
> + = num_def ? string_slice ("default") : attr_list.pop ();
> +
> + for (string_slice attr : attr_list)
> {
> - char *attr = attrs[i];
> + /* Skip default nodes. */
> + if (attr == string_slice ("default"))
Would be good to avoid the explicit constructor.
> + continue;
>
> /* Create new target clone. */
> tree attributes = make_attribute (new_attr_name, attr,
> DECL_ATTRIBUTES (node->decl));
>
> - char *suffix = XNEWVEC (char, strlen (attr) + 1);
> - create_new_asm_name (attr, suffix);
> - cgraph_node *new_node = create_target_clone (node, definition, suffix,
> - attributes);
> - XDELETEVEC (suffix);
> + /* Remove the target_clones attribute, as this can confuse
> + is_function_default_version. */
> + remove_attribute ("target_clones", attributes);
The make_attribute call above reuses the old DECL_ATTRIBUTES, rather
than copying it, so isn't the effect of this to remove the target_clones
attribute from node->decl as well, if the attribute isn't the first one
in node->decl? If that's ok (it might be), then perhaps we should do
this once, before the loop.
Otherwise the attribs.cc, multiple-target.cc, tree.* and aarch64 bits
look really good. I'm not qualified to review the rest.
Thanks,
Richard