Author: Chuanqi Xu Date: 2024-06-25T15:04:32+08:00 New Revision: fa20184a8f336e4154f2ffeeeb8a538dc9462d9a
URL: https://github.com/llvm/llvm-project/commit/fa20184a8f336e4154f2ffeeeb8a538dc9462d9a DIFF: https://github.com/llvm/llvm-project/commit/fa20184a8f336e4154f2ffeeeb8a538dc9462d9a.diff LOG: [C++20] [Modules] [Serialization] Don't reuse type ID and identifier ID from imported modules To support no-transitive-change model for named modules, we can't reuse type ID and identifier ID from imported modules arbitrarily. Since the theory for no-transitive-change model is, for a user of a named module, the user can only access the indirectly imported decls via the directly imported module. So that it is possible to control what matters to the users when writing the module. And it will be unsafe to do so if the users can reuse the type IDs and identifier IDs from the indirectly imported modules not via the directly imported modules. So in this patch, we don't reuse the type ID and identifier ID in the AST writer to avoid the problematic case. Added: clang/test/Modules/no-external-identifier-id.cppm clang/test/Modules/no-external-type-id.cppm Modified: clang/lib/Serialization/ASTWriter.cpp Removed: ################################################################################ diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 5b39055cf9f27..9c55960b14cba 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5355,6 +5355,20 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, writeUnhashedControlBlock(PP, Context); + // Don't reuse type ID and Identifier ID from readers for C++ standard named + // modules since we want to support no-transitive-change model for named + // modules. The theory for no-transitive-change model is, + // for a user of a named module, the user can only access the indirectly + // imported decls via the directly imported module. So that it is possible to + // control what matters to the users when writing the module. It would be + // problematic if the users can reuse the type IDs and identifier IDs from + // indirectly imported modules arbitrarily. So we choose to clear these ID + // here. + if (isWritingStdCXXNamedModules()) { + TypeIdxs.clear(); + IdentifierIDs.clear(); + } + // Look for any identifiers that were named while processing the // headers, but are otherwise not needed. We add these to the hash // table to enable checking of the predefines buffer in the case @@ -6686,6 +6700,11 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) { } void ASTWriter::IdentifierRead(IdentifierID ID, IdentifierInfo *II) { + // Don't reuse Type ID from external modules for named modules. See the + // comments in WriteASTCore for details. + if (isWritingStdCXXNamedModules()) + return; + IdentifierID &StoredID = IdentifierIDs[II]; unsigned OriginalModuleFileIndex = StoredID >> 32; @@ -6708,6 +6727,11 @@ void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) { } void ASTWriter::TypeRead(TypeIdx Idx, QualType T) { + // Don't reuse Type ID from external modules for named modules. See the + // comments in WriteASTCore for details. + if (isWritingStdCXXNamedModules()) + return; + // Always take the type index that comes in later module files. // This copes with an interesting // case for chained AST writing where we schedule writing the type and then, diff --git a/clang/test/Modules/no-external-identifier-id.cppm b/clang/test/Modules/no-external-identifier-id.cppm new file mode 100644 index 0000000000000..25825ef67ad91 --- /dev/null +++ b/clang/test/Modules/no-external-identifier-id.cppm @@ -0,0 +1,37 @@ +// Testing that we won't record the identifier ID from external modules. +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/b.pcm | FileCheck %t/b.cppm +// +// RUN: %clang_cc1 -std=c++20 %t/a.v1.cppm -emit-module-interface -o %t/a.v1.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.v1.pcm \ +// RUN: -fmodule-file=a=%t/a.v1.pcm +// RUN: diff %t/b.pcm %t/b.v1.pcm &> /dev/null + +//--- a.cppm +export module a; +export inline int a() { + int foo = 43; + return foo; +} + +//--- b.cppm +export module b; +import a; +export inline int b() { + int foo = 43; + return foo; +} + +// CHECK: <DECL_VAR {{.*}} op5=4 + +//--- a.v1.cppm +// We remove the unused the function and testing if the format of the BMI of B will change. +export module a; + diff --git a/clang/test/Modules/no-external-type-id.cppm b/clang/test/Modules/no-external-type-id.cppm new file mode 100644 index 0000000000000..3ca50be18a7fd --- /dev/null +++ b/clang/test/Modules/no-external-type-id.cppm @@ -0,0 +1,32 @@ +// Testing that we won't record the type ID from external modules. +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \ +// RUN: -fmodule-file=a=%t/a.pcm +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/b.pcm | FileCheck %t/b.cppm +// +// RUN: %clang_cc1 -std=c++20 %t/a.v1.cppm -emit-module-interface -o %t/a.v1.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.v1.pcm \ +// RUN: -fmodule-file=a=%t/a.v1.pcm +// RUN: diff %t/b.pcm %t/b.v1.pcm &> /dev/null + +//--- a.cppm +export module a; +export int a(); + +//--- b.cppm +export module b; +import a; +export int b(); + +// CHECK: <DECL_FUNCTION {{.*}} op8=4048 +// CHECK: <TYPE_FUNCTION_PROTO + +//--- a.v1.cppm +// We remove the unused the function and testing if the format of the BMI of B will change. +export module a; + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits