https://gcc.gnu.org/g:0337e3c2743ca0c57da8c6b78b725a7d83f0b721
commit r16-1738-g0337e3c2743ca0c57da8c6b78b725a7d83f0b721 Author: Nathaniel Shead <nathanielosh...@gmail.com> Date: Wed May 21 01:18:49 2025 +1000 c++/modules: Avoid name clashes when streaming internal labels [PR98375,PR118904] The frontend creates some variables that need to be given unique names for the TU so that they can unambiguously be accessed. Historically this has been done with a global counter local to each place that needs an internal label, but this doesn't work with modules as depending on what declarations have been imported, some counter values may have already been used. This patch reworks the situation to instead have a single collection of counters for the TU, and a new function 'generate_internal_label' that gets the next label with given prefix using that counter. Modules streaming can then use this function to regenerate new names on stream-in for any such decls, guaranteeing uniqueness within the TU. These labels should only be used for internal entities so there should be no issues with the names differing from TU to TU; we will need to handle this if we ever start checking ODR of definitions we're merging but that's an issue for later. For proof of concept, this patch makes use of the new API for __builtin_source_location and ubsan; there are probably other places in the frontend where this change will need to be made as well. One other change this exposes is that both of these components rely on the definition of the VAR_DECLs they create, so stream that too for uncontexted variables. PR c++/98735 PR c++/118904 gcc/cp/ChangeLog: * cp-gimplify.cc (source_location_id): Remove. (fold_builtin_source_location): Use generate_internal_label. * module.cc (enum tree_tag): Add 'tt_internal_id' enumerator. (trees_out::tree_value): Adjust assertion, write definitions of uncontexted VAR_DECLs. (trees_in::tree_value): Read variable definitions. (trees_out::tree_node): Write internal labels, adjust assert. (trees_in::tree_node): Read internal labels. gcc/ChangeLog: * tree.cc (struct identifier_hash): New type. (struct identifier_count_traits): New traits. (internal_label_nums): New hash map. (generate_internal_label): New function. (prefix_for_internal_label): New function. * tree.h (IDENTIFIER_INTERNAL_P): New macro. (generate_internal_label): Declare. (prefix_for_internal_label): Declare. * ubsan.cc (ubsan_ids): Remove. (ubsan_type_descriptor): Use generate_internal_label. (ubsan_create_data): Likewise. gcc/testsuite/ChangeLog: * g++.dg/modules/src-loc-1.h: New test. * g++.dg/modules/src-loc-1_a.H: New test. * g++.dg/modules/src-loc-1_b.C: New test. * g++.dg/modules/src-loc-1_c.C: New test. * g++.dg/modules/ubsan-1_a.C: New test. * g++.dg/modules/ubsan-1_b.C: New test. * g++.dg/ubsan/module-1-aux.cc: New test. * g++.dg/ubsan/module-1.C: New test. Signed-off-by: Nathaniel Shead <nathanielosh...@gmail.com> Reviewed-by: Jason Merrill <ja...@redhat.com> Diff: --- gcc/cp/cp-gimplify.cc | 5 +-- gcc/cp/module.cc | 51 ++++++++++++++++++++++++++---- gcc/testsuite/g++.dg/modules/src-loc-1.h | 6 ++++ gcc/testsuite/g++.dg/modules/src-loc-1_a.H | 7 ++++ gcc/testsuite/g++.dg/modules/src-loc-1_b.C | 5 +++ gcc/testsuite/g++.dg/modules/src-loc-1_c.C | 16 ++++++++++ gcc/testsuite/g++.dg/modules/ubsan-1_a.C | 10 ++++++ gcc/testsuite/g++.dg/modules/ubsan-1_b.C | 14 ++++++++ gcc/testsuite/g++.dg/ubsan/module-1-aux.cc | 12 +++++++ gcc/testsuite/g++.dg/ubsan/module-1.C | 11 +++++++ gcc/tree.cc | 51 ++++++++++++++++++++++++++++++ gcc/tree.h | 7 ++++ gcc/ubsan.cc | 16 +++------- 13 files changed, 188 insertions(+), 23 deletions(-) diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc index 0fcfa16d2c59..ce69bd6030c7 100644 --- a/gcc/cp/cp-gimplify.cc +++ b/gcc/cp/cp-gimplify.cc @@ -3887,7 +3887,6 @@ struct source_location_table_entry_hash static GTY(()) hash_table <source_location_table_entry_hash> *source_location_table; -static GTY(()) unsigned int source_location_id; /* Fold the __builtin_source_location () call T. */ @@ -3920,9 +3919,7 @@ fold_builtin_source_location (const_tree t) var = entryp->var; else { - char tmp_name[32]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lsrc_loc", source_location_id++); - var = build_decl (loc, VAR_DECL, get_identifier (tmp_name), + var = build_decl (loc, VAR_DECL, generate_internal_label ("Lsrc_loc"), source_location_impl); TREE_STATIC (var) = 1; TREE_PUBLIC (var) = 0; diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index e5fb1c36e9d2..42a1b83e1643 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -2811,12 +2811,13 @@ enum tree_tag { tt_decl, /* By-value mergeable decl. */ tt_tpl_parm, /* Template parm. */ - /* The ordering of the following 4 is relied upon in + /* The ordering of the following 5 is relied upon in trees_out::tree_node. */ tt_id, /* Identifier node. */ tt_conv_id, /* Conversion operator name. */ tt_anon_id, /* Anonymous name. */ tt_lambda_id, /* Lambda name. */ + tt_internal_id, /* Internal name. */ tt_typedef_type, /* A (possibly implicit) typedefed type. */ tt_derived_type, /* A type derived from another type. */ @@ -9666,7 +9667,9 @@ trees_out::tree_value (tree t) && (TREE_CODE (t) != TYPE_DECL || (DECL_ARTIFICIAL (t) && !DECL_CONTEXT (t))) && (TREE_CODE (t) != VAR_DECL - || (!DECL_NAME (t) && !DECL_CONTEXT (t))) + || ((!DECL_NAME (t) + || IDENTIFIER_INTERNAL_P (DECL_NAME (t))) + && !DECL_CONTEXT (t))) && TREE_CODE (t) != FUNCTION_DECL)); if (streaming_p ()) @@ -9730,6 +9733,14 @@ trees_out::tree_value (tree t) && dump ("Written type:%d %C:%N", type_tag, TREE_CODE (type), type); } + /* For uncontexted VAR_DECLs we need to stream the definition so that + importers can recreate their value. */ + if (TREE_CODE (t) == VAR_DECL) + { + gcc_checking_assert (!DECL_NONTRIVIALLY_INITIALIZED_P (t)); + tree_node (DECL_INITIAL (t)); + } + if (streaming_p ()) dump (dumper::TREE) && dump ("Written tree:%d %C:%N", tag, TREE_CODE (t), t); } @@ -9810,6 +9821,13 @@ bail: && dump ("Read type:%d %C:%N", type_tag, TREE_CODE (type), type); } + if (TREE_CODE (t) == VAR_DECL) + { + DECL_INITIAL (t) = tree_node (); + if (TREE_STATIC (t)) + varpool_node::finalize_decl (t); + } + if (TREE_CODE (t) == LAMBDA_EXPR && CLASSTYPE_LAMBDA_EXPR (TREE_TYPE (t))) { @@ -9952,10 +9970,13 @@ trees_out::tree_node (tree t) if (TREE_CODE (t) == IDENTIFIER_NODE) { - /* An identifier node -> tt_id, tt_conv_id, tt_anon_id, tt_lambda_id. */ + /* An identifier node -> tt_id, tt_conv_id, tt_anon_id, tt_lambda_id, + tt_internal_id. */ int code = tt_id; if (IDENTIFIER_ANON_P (t)) code = IDENTIFIER_LAMBDA_P (t) ? tt_lambda_id : tt_anon_id; + else if (IDENTIFIER_INTERNAL_P (t)) + code = tt_internal_id; else if (IDENTIFIER_CONV_OP_P (t)) code = tt_conv_id; @@ -9970,13 +9991,15 @@ trees_out::tree_node (tree t) } else if (code == tt_id && streaming_p ()) str (IDENTIFIER_POINTER (t), IDENTIFIER_LENGTH (t)); + else if (code == tt_internal_id && streaming_p ()) + str (prefix_for_internal_label (t)); int tag = insert (t); if (streaming_p ()) { - /* We know the ordering of the 4 id tags. */ + /* We know the ordering of the 5 id tags. */ static const char *const kinds[] = - {"", "conv_op ", "anon ", "lambda "}; + {"", "conv_op ", "anon ", "lambda ", "internal "}; dump (dumper::TREE) && dump ("Written:%d %sidentifier:%N", tag, kinds[code - tt_id], @@ -10053,8 +10076,11 @@ trees_out::tree_node (tree t) break; case VAR_DECL: - /* AGGR_INIT_EXPRs cons up anonymous uncontexted VAR_DECLs. */ - gcc_checking_assert (!DECL_NAME (t) + /* AGGR_INIT_EXPRs cons up anonymous uncontexted VAR_DECLs, + and internal vars are created by sanitizers and + __builtin_source_location. */ + gcc_checking_assert ((!DECL_NAME (t) + || IDENTIFIER_INTERNAL_P (DECL_NAME (t))) && DECL_ARTIFICIAL (t)); break; @@ -10211,6 +10237,17 @@ trees_in::tree_node (bool is_use) } break; + case tt_internal_id: + /* An internal label. */ + { + const char *prefix = str (); + res = generate_internal_label (prefix); + int tag = insert (res); + dump (dumper::TREE) + && dump ("Read internal identifier:%d %N", tag, res); + } + break; + case tt_typedef_type: res = tree_node (); if (res) diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1.h b/gcc/testsuite/g++.dg/modules/src-loc-1.h new file mode 100644 index 000000000000..90c2811fe8be --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/src-loc-1.h @@ -0,0 +1,6 @@ +// PR c++/118904 + +#include <source_location> +inline std::source_location foo() { + return std::source_location::current(); +} diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_a.H b/gcc/testsuite/g++.dg/modules/src-loc-1_a.H new file mode 100644 index 000000000000..21c92ed4f83c --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/src-loc-1_a.H @@ -0,0 +1,7 @@ +// PR c++/118904 +// { dg-additional-options "-fmodule-header -std=c++20 -fdump-lang-module-uid" } +// { dg-module-cmi {} } + +#include "src-loc-1.h" + +// { dg-final { scan-lang-dump {Written:-[0-9]* internal identifier:[^\n\r]*Lsrc_loc} module } } diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_b.C b/gcc/testsuite/g++.dg/modules/src-loc-1_b.C new file mode 100644 index 000000000000..44881a2270cb --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/src-loc-1_b.C @@ -0,0 +1,5 @@ +// PR c++/118904 +// { dg-additional-options "-fmodules -std=c++20 -fno-module-lazy" } + +#include "src-loc-1.h" +import "src-loc-1_a.H"; diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_c.C b/gcc/testsuite/g++.dg/modules/src-loc-1_c.C new file mode 100644 index 000000000000..6b62fd965241 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/src-loc-1_c.C @@ -0,0 +1,16 @@ +// PR c++/118904 +// { dg-module-do run } +// { dg-additional-options "-fmodules -std=c++20 -fdump-lang-module-uid" } + +import "src-loc-1_a.H"; + +int main() { + const char* a = foo().function_name(); + const char* b = std::source_location::current().function_name(); + if (__builtin_strcmp(a, "std::source_location foo()")) + __builtin_abort(); + if (__builtin_strcmp(b, "int main()")) + __builtin_abort(); +} + +// { dg-final { scan-lang-dump {Read internal identifier:-[0-9]* [^\n\r]*Lsrc_loc} module } } diff --git a/gcc/testsuite/g++.dg/modules/ubsan-1_a.C b/gcc/testsuite/g++.dg/modules/ubsan-1_a.C new file mode 100644 index 000000000000..583d20118baf --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/ubsan-1_a.C @@ -0,0 +1,10 @@ +// PR c++/98735 +// { dg-additional-options "-fmodules -fsanitize=undefined -Wno-return-type" } +// { dg-module-cmi X } + +export module X; + +export inline int f(int x) { + if (x > 0) + return x * 5; +} diff --git a/gcc/testsuite/g++.dg/modules/ubsan-1_b.C b/gcc/testsuite/g++.dg/modules/ubsan-1_b.C new file mode 100644 index 000000000000..ae715198f6e6 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/ubsan-1_b.C @@ -0,0 +1,14 @@ +// PR c++/98735 +// { dg-additional-options "-fmodules -fsanitize=undefined -Wno-return-type" } +// Note: can't work out how to do a link test here. + +int g(int x) { + if (x > 0) + return x - 5; +} + +import X; + +int main() { + f(123); +} diff --git a/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc b/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc new file mode 100644 index 000000000000..7930896140f1 --- /dev/null +++ b/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc @@ -0,0 +1,12 @@ +// PR c++/98735 + +int g(int x) { + if (x > 0) + return x - 5; +} + +import X; + +int main() { + f(123); +} diff --git a/gcc/testsuite/g++.dg/ubsan/module-1.C b/gcc/testsuite/g++.dg/ubsan/module-1.C new file mode 100644 index 000000000000..566113da514e --- /dev/null +++ b/gcc/testsuite/g++.dg/ubsan/module-1.C @@ -0,0 +1,11 @@ +// PR c++/98735 +// { dg-do run { target c++17 } } +// { dg-options "-fmodules -fsanitize=undefined -Wno-return-type" } +// { dg-additional-sources module-1-aux.cc } + +export module X; + +export inline int f(int x) { + if (x > 0) + return x * 5; +} diff --git a/gcc/tree.cc b/gcc/tree.cc index c8b8b3edd35a..e9a83e4260b3 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -788,6 +788,57 @@ init_ttree (void) } +/* Mapping from prefix to label number. */ + +struct identifier_hash : ggc_ptr_hash <tree_node> +{ + static inline hashval_t hash (tree t) + { + return IDENTIFIER_HASH_VALUE (t); + } +}; +struct identifier_count_traits + : simple_hashmap_traits<identifier_hash, long> {}; +typedef hash_map<tree, long, identifier_count_traits> internal_label_map; +static GTY(()) internal_label_map *internal_label_nums; + +/* Generates an identifier intended to be used internally with the + given PREFIX. This is intended to be used by the frontend so that + C++ modules can regenerate appropriate (non-clashing) identifiers on + stream-in. */ + +tree +generate_internal_label (const char *prefix) +{ + tree prefix_id = get_identifier (prefix); + if (!internal_label_nums) + internal_label_nums = internal_label_map::create_ggc(); + long &num = internal_label_nums->get_or_insert (prefix_id); + + char tmp[32]; + ASM_GENERATE_INTERNAL_LABEL (tmp, prefix, num++); + + tree id = get_identifier (tmp); + IDENTIFIER_INTERNAL_P (id) = true; + + /* Cache the prefix on the identifier so we can retrieve it later. */ + TREE_CHAIN (id) = prefix_id; + + return id; +} + +/* Get the PREFIX we created the internal identifier LABEL with. */ + +const char * +prefix_for_internal_label (tree label) +{ + gcc_assert (IDENTIFIER_INTERNAL_P (label) + && !IDENTIFIER_TRANSPARENT_ALIAS (label) + && TREE_CHAIN (label) + && TREE_CODE (TREE_CHAIN (label)) == IDENTIFIER_NODE); + return IDENTIFIER_POINTER (TREE_CHAIN (label)); +} + /* The name of the object as the assembler will see it (but before any translations made by ASM_OUTPUT_LABELREF). Often this is the same as DECL_NAME. It is an IDENTIFIER_NODE. */ diff --git a/gcc/tree.h b/gcc/tree.h index c0ecf30d19bb..5de8d7e1ca17 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1078,6 +1078,11 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int, #define IDENTIFIER_ANON_P(NODE) \ (IDENTIFIER_NODE_CHECK (NODE)->base.private_flag) +/* Nonzero indicates an IDENTIFIER_NODE that names an internal label. + The prefix used to generate the label can be found on the TREE_CHAIN. */ +#define IDENTIFIER_INTERNAL_P(NODE) \ + (IDENTIFIER_NODE_CHECK (NODE)->base.volatile_flag) + /* Nonzero in an IDENTIFIER_NODE if the name is a local alias, whose uses are to be substituted for uses of the TREE_CHAINed identifier. */ #define IDENTIFIER_TRANSPARENT_ALIAS(NODE) \ @@ -4744,6 +4749,8 @@ vector_cst_encoded_nelts (const_tree t) return VECTOR_CST_NPATTERNS (t) * VECTOR_CST_NELTS_PER_PATTERN (t); } +extern tree generate_internal_label (const char *); +extern const char *prefix_for_internal_label (tree label); extern tree decl_assembler_name (tree); extern void overwrite_decl_assembler_name (tree decl, tree name); extern tree decl_comdat_group (const_tree); diff --git a/gcc/ubsan.cc b/gcc/ubsan.cc index 35a7dbd011ee..3c130a660951 100644 --- a/gcc/ubsan.cc +++ b/gcc/ubsan.cc @@ -358,10 +358,6 @@ get_ubsan_type_info_for_type (tree type) return 0; } -/* Counters for internal labels. ubsan_ids[0] for Lubsan_type, - ubsan_ids[1] for Lubsan_data labels. */ -static GTY(()) unsigned int ubsan_ids[2]; - /* Helper routine that returns ADDR_EXPR of a VAR_DECL of a type descriptor. It first looks into the hash table; if not found, create the VAR_DECL, put it into the hash table and return the @@ -552,10 +548,8 @@ ubsan_type_descriptor (tree type, enum ubsan_print_style pstyle) TREE_READONLY (str) = 1; TREE_STATIC (str) = 1; - char tmp_name[32]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_type", ubsan_ids[0]++); - decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name), - dtype); + decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, + generate_internal_label ("Lubsan_type"), dtype); TREE_STATIC (decl) = 1; TREE_PUBLIC (decl) = 0; DECL_ARTIFICIAL (decl) = 1; @@ -659,10 +653,8 @@ ubsan_create_data (const char *name, int loccnt, const location_t *ploc, ...) layout_type (ret); /* Now, fill in the type. */ - char tmp_name[32]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_data", ubsan_ids[1]++); - tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name), - ret); + tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, + generate_internal_label ("Lubsan_data"), ret); TREE_STATIC (var) = 1; TREE_PUBLIC (var) = 0; DECL_ARTIFICIAL (var) = 1;